pegjs/lib/compiler/passes/generate-bytecode.js
David Majda 5a4d04fa90 Construct expectations using functions
Until now, expectations were constructed using object literals. This
commit changes the construction to use factory functions.

This change makes generated parsers slightly smaller because property
names don't have to be repeated many times and factory function calls
are more amenable to minifying.

Some numbers based on the aggregate size of parsers generated from
examples/*.pegjs:

  Optimization   Minified?   Size before   Size after   Saving
  ------------------------------------------------------------
  speed          no               719066       716063    0.42%
  speed          yes              188998       180202    4.65%
  size           no               194810       197813    1.52%
  size           yes              108782        99947    8.12%

(Minification was done using "uglify --mangle --compress" with
uglify-js 2.4.24.)
2016-06-18 06:57:09 +02:00

632 lines
16 KiB
JavaScript

"use strict";
var arrays = require("../../utils/arrays"),
objects = require("../../utils/objects"),
asts = require("../asts"),
visitor = require("../visitor"),
op = require("../opcodes"),
js = require("../js");
/* Generates bytecode.
*
* Instructions
* ============
*
* Stack Manipulation
* ------------------
*
* [0] PUSH c
*
* stack.push(consts[c]);
*
* [1] PUSH_UNDEFINED
*
* stack.push(undefined);
*
* [2] PUSH_NULL
*
* stack.push(null);
*
* [3] PUSH_FAILED
*
* stack.push(FAILED);
*
* [4] PUSH_EMPTY_ARRAY
*
* stack.push([]);
*
* [5] PUSH_CURR_POS
*
* stack.push(currPos);
*
* [6] POP
*
* stack.pop();
*
* [7] POP_CURR_POS
*
* currPos = stack.pop();
*
* [8] POP_N n
*
* stack.pop(n);
*
* [9] NIP
*
* value = stack.pop();
* stack.pop();
* stack.push(value);
*
* [10] APPEND
*
* value = stack.pop();
* array = stack.pop();
* array.push(value);
* stack.push(array);
*
* [11] WRAP n
*
* stack.push(stack.pop(n));
*
* [12] TEXT
*
* stack.push(input.substring(stack.pop(), currPos));
*
* Conditions and Loops
* --------------------
*
* [13] IF t, f
*
* if (stack.top()) {
* interpret(ip + 3, ip + 3 + t);
* } else {
* interpret(ip + 3 + t, ip + 3 + t + f);
* }
*
* [14] IF_ERROR t, f
*
* if (stack.top() === FAILED) {
* interpret(ip + 3, ip + 3 + t);
* } else {
* interpret(ip + 3 + t, ip + 3 + t + f);
* }
*
* [15] IF_NOT_ERROR t, f
*
* if (stack.top() !== FAILED) {
* interpret(ip + 3, ip + 3 + t);
* } else {
* interpret(ip + 3 + t, ip + 3 + t + f);
* }
*
* [16] WHILE_NOT_ERROR b
*
* while(stack.top() !== FAILED) {
* interpret(ip + 2, ip + 2 + b);
* }
*
* Matching
* --------
*
* [17] MATCH_ANY a, f, ...
*
* if (input.length > currPos) {
* interpret(ip + 3, ip + 3 + a);
* } else {
* interpret(ip + 3 + a, ip + 3 + a + f);
* }
*
* [18] MATCH_STRING s, a, f, ...
*
* if (input.substr(currPos, consts[s].length) === consts[s]) {
* interpret(ip + 4, ip + 4 + a);
* } else {
* interpret(ip + 4 + a, ip + 4 + a + f);
* }
*
* [19] MATCH_STRING_IC s, a, f, ...
*
* if (input.substr(currPos, consts[s].length).toLowerCase() === consts[s]) {
* interpret(ip + 4, ip + 4 + a);
* } else {
* interpret(ip + 4 + a, ip + 4 + a + f);
* }
*
* [20] MATCH_REGEXP r, a, f, ...
*
* if (consts[r].test(input.charAt(currPos))) {
* interpret(ip + 4, ip + 4 + a);
* } else {
* interpret(ip + 4 + a, ip + 4 + a + f);
* }
*
* [21] ACCEPT_N n
*
* stack.push(input.substring(currPos, n));
* currPos += n;
*
* [22] ACCEPT_STRING s
*
* stack.push(consts[s]);
* currPos += consts[s].length;
*
* [23] FAIL e
*
* stack.push(FAILED);
* fail(consts[e]);
*
* Calls
* -----
*
* [24] LOAD_SAVED_POS p
*
* savedPos = stack[p];
*
* [25] UPDATE_SAVED_POS
*
* savedPos = currPos;
*
* [26] CALL f, n, pc, p1, p2, ..., pN
*
* value = consts[f](stack[p1], ..., stack[pN]);
* stack.pop(n);
* stack.push(value);
*
* Rules
* -----
*
* [27] RULE r
*
* stack.push(parseRule(r));
*
* Failure Reporting
* -----------------
*
* [28] SILENT_FAILS_ON
*
* silentFails++;
*
* [29] SILENT_FAILS_OFF
*
* silentFails--;
*/
function generateBytecode(ast) {
var consts = [];
function addConst(value) {
var index = arrays.indexOf(consts, value);
return index === -1 ? consts.push(value) - 1 : index;
}
function addFunctionConst(params, code) {
return addConst(
"function(" + params.join(", ") + ") {" + code + "}"
);
}
function buildSequence() {
return Array.prototype.concat.apply([], arguments);
}
function buildCondition(condCode, thenCode, elseCode) {
return condCode.concat(
[thenCode.length, elseCode.length],
thenCode,
elseCode
);
}
function buildLoop(condCode, bodyCode) {
return condCode.concat([bodyCode.length], bodyCode);
}
function buildCall(functionIndex, delta, env, sp) {
var params = arrays.map(objects.values(env), function(p) { return sp - p; });
return [op.CALL, functionIndex, delta, params.length].concat(params);
}
function buildSimplePredicate(expression, negative, context) {
return buildSequence(
[op.PUSH_CURR_POS],
[op.SILENT_FAILS_ON],
generate(expression, {
sp: context.sp + 1,
env: objects.clone(context.env),
action: null
}),
[op.SILENT_FAILS_OFF],
buildCondition(
[negative ? op.IF_ERROR : op.IF_NOT_ERROR],
buildSequence(
[op.POP],
[negative ? op.POP : op.POP_CURR_POS],
[op.PUSH_UNDEFINED]
),
buildSequence(
[op.POP],
[negative ? op.POP_CURR_POS : op.POP],
[op.PUSH_FAILED]
)
)
);
}
function buildSemanticPredicate(code, negative, context) {
var functionIndex = addFunctionConst(objects.keys(context.env), code);
return buildSequence(
[op.UPDATE_SAVED_POS],
buildCall(functionIndex, 0, context.env, context.sp),
buildCondition(
[op.IF],
buildSequence(
[op.POP],
negative ? [op.PUSH_FAILED] : [op.PUSH_UNDEFINED]
),
buildSequence(
[op.POP],
negative ? [op.PUSH_UNDEFINED] : [op.PUSH_FAILED]
)
)
);
}
function buildAppendLoop(expressionCode) {
return buildLoop(
[op.WHILE_NOT_ERROR],
buildSequence([op.APPEND], expressionCode)
);
}
var generate = visitor.build({
grammar: function(node) {
arrays.each(node.rules, generate);
node.consts = consts;
},
rule: function(node) {
node.bytecode = generate(node.expression, {
sp: -1, // stack pointer
env: { }, // mapping of label names to stack positions
action: null // action nodes pass themselves to children here
});
},
named: function(node, context) {
var nameIndex = addConst(
'peg$otherExpectation("' + js.stringEscape(node.name) + '")'
);
/*
* The code generated below is slightly suboptimal because |FAIL| pushes
* to the stack, so we need to stick a |POP| in front of it. We lack a
* dedicated instruction that would just report the failure and not touch
* the stack.
*/
return buildSequence(
[op.SILENT_FAILS_ON],
generate(node.expression, context),
[op.SILENT_FAILS_OFF],
buildCondition([op.IF_ERROR], [op.FAIL, nameIndex], [])
);
},
choice: function(node, context) {
function buildAlternativesCode(alternatives, context) {
return buildSequence(
generate(alternatives[0], {
sp: context.sp,
env: objects.clone(context.env),
action: null
}),
alternatives.length > 1
? buildCondition(
[op.IF_ERROR],
buildSequence(
[op.POP],
buildAlternativesCode(alternatives.slice(1), context)
),
[]
)
: []
);
}
return buildAlternativesCode(node.alternatives, context);
},
action: function(node, context) {
var env = objects.clone(context.env),
emitCall = node.expression.type !== "sequence"
|| node.expression.elements.length === 0,
expressionCode = generate(node.expression, {
sp: context.sp + (emitCall ? 1 : 0),
env: env,
action: node
}),
functionIndex = addFunctionConst(objects.keys(env), node.code);
return emitCall
? buildSequence(
[op.PUSH_CURR_POS],
expressionCode,
buildCondition(
[op.IF_NOT_ERROR],
buildSequence(
[op.LOAD_SAVED_POS, 1],
buildCall(functionIndex, 1, env, context.sp + 2)
),
[]
),
[op.NIP]
)
: expressionCode;
},
sequence: function(node, context) {
function buildElementsCode(elements, context) {
var processedCount, functionIndex;
if (elements.length > 0) {
processedCount = node.elements.length - elements.slice(1).length;
return buildSequence(
generate(elements[0], {
sp: context.sp,
env: context.env,
action: null
}),
buildCondition(
[op.IF_NOT_ERROR],
buildElementsCode(elements.slice(1), {
sp: context.sp + 1,
env: context.env,
action: context.action
}),
buildSequence(
processedCount > 1 ? [op.POP_N, processedCount] : [op.POP],
[op.POP_CURR_POS],
[op.PUSH_FAILED]
)
)
);
} else {
if (context.action) {
functionIndex = addFunctionConst(
objects.keys(context.env),
context.action.code
);
return buildSequence(
[op.LOAD_SAVED_POS, node.elements.length],
buildCall(
functionIndex,
node.elements.length,
context.env,
context.sp
),
[op.NIP]
);
} else {
return buildSequence([op.WRAP, node.elements.length], [op.NIP]);
}
}
}
return buildSequence(
[op.PUSH_CURR_POS],
buildElementsCode(node.elements, {
sp: context.sp + 1,
env: context.env,
action: context.action
})
);
},
labeled: function(node, context) {
var env = objects.clone(context.env);
context.env[node.label] = context.sp + 1;
return generate(node.expression, {
sp: context.sp,
env: env,
action: null
});
},
text: function(node, context) {
return buildSequence(
[op.PUSH_CURR_POS],
generate(node.expression, {
sp: context.sp + 1,
env: objects.clone(context.env),
action: null
}),
buildCondition(
[op.IF_NOT_ERROR],
buildSequence([op.POP], [op.TEXT]),
[op.NIP]
)
);
},
simple_and: function(node, context) {
return buildSimplePredicate(node.expression, false, context);
},
simple_not: function(node, context) {
return buildSimplePredicate(node.expression, true, context);
},
optional: function(node, context) {
return buildSequence(
generate(node.expression, {
sp: context.sp,
env: objects.clone(context.env),
action: null
}),
buildCondition(
[op.IF_ERROR],
buildSequence([op.POP], [op.PUSH_NULL]),
[]
)
);
},
zero_or_more: function(node, context) {
var expressionCode = generate(node.expression, {
sp: context.sp + 1,
env: objects.clone(context.env),
action: null
});
return buildSequence(
[op.PUSH_EMPTY_ARRAY],
expressionCode,
buildAppendLoop(expressionCode),
[op.POP]
);
},
one_or_more: function(node, context) {
var expressionCode = generate(node.expression, {
sp: context.sp + 1,
env: objects.clone(context.env),
action: null
});
return buildSequence(
[op.PUSH_EMPTY_ARRAY],
expressionCode,
buildCondition(
[op.IF_NOT_ERROR],
buildSequence(buildAppendLoop(expressionCode), [op.POP]),
buildSequence([op.POP], [op.POP], [op.PUSH_FAILED])
)
);
},
group: function(node, context) {
return generate(node.expression, {
sp: context.sp,
env: objects.clone(context.env),
action: null
});
},
semantic_and: function(node, context) {
return buildSemanticPredicate(node.code, false, context);
},
semantic_not: function(node, context) {
return buildSemanticPredicate(node.code, true, context);
},
rule_ref: function(node) {
return [op.RULE, asts.indexOfRule(ast, node.name)];
},
literal: function(node) {
var stringIndex, expectedIndex;
if (node.value.length > 0) {
stringIndex = addConst('"'
+ js.stringEscape(
node.ignoreCase ? node.value.toLowerCase() : node.value
)
+ '"'
);
expectedIndex = addConst(
'peg$literalExpectation('
+ '"' + js.stringEscape(node.value) + '", '
+ node.ignoreCase
+ ')'
);
/*
* For case-sensitive strings the value must match the beginning of the
* remaining input exactly. As a result, we can use |ACCEPT_STRING| and
* save one |substr| call that would be needed if we used |ACCEPT_N|.
*/
return buildCondition(
node.ignoreCase
? [op.MATCH_STRING_IC, stringIndex]
: [op.MATCH_STRING, stringIndex],
node.ignoreCase
? [op.ACCEPT_N, node.value.length]
: [op.ACCEPT_STRING, stringIndex],
[op.FAIL, expectedIndex]
);
} else {
stringIndex = addConst('""');
return [op.PUSH, stringIndex];
}
},
"class": function(node) {
var regexp, parts, regexpIndex, expectedIndex;
if (node.parts.length > 0) {
regexp = '/^['
+ (node.inverted ? '^' : '')
+ arrays.map(node.parts, function(part) {
return part instanceof Array
? js.regexpClassEscape(part[0])
+ '-'
+ js.regexpClassEscape(part[1])
: js.regexpClassEscape(part);
}).join('')
+ ']/' + (node.ignoreCase ? 'i' : '');
} else {
/*
* IE considers regexps /[]/ and /[^]/ as syntactically invalid, so we
* translate them into equivalents it can handle.
*/
regexp = node.inverted ? '/^[\\S\\s]/' : '/^(?!)/';
}
parts = '['
+ arrays.map(node.parts, function(part) {
return part instanceof Array
? '["' + js.stringEscape(part[0]) + '", "' + js.stringEscape(part[1]) + '"]'
: '"' + js.stringEscape(part) + '"';
}).join(', ')
+ ']';
regexpIndex = addConst(regexp);
expectedIndex = addConst(
'peg$classExpectation('
+ parts + ', '
+ node.inverted + ', '
+ node.ignoreCase
+ ')'
);
return buildCondition(
[op.MATCH_REGEXP, regexpIndex],
[op.ACCEPT_N, 1],
[op.FAIL, expectedIndex]
);
},
any: function() {
var expectedIndex = addConst('peg$anyExpectation()');
return buildCondition(
[op.MATCH_ANY],
[op.ACCEPT_N, 1],
[op.FAIL, expectedIndex]
);
}
});
generate(ast);
}
module.exports = generateBytecode;