5a4d04fa90
Until now, expectations were constructed using object literals. This commit changes the construction to use factory functions. This change makes generated parsers slightly smaller because property names don't have to be repeated many times and factory function calls are more amenable to minifying. Some numbers based on the aggregate size of parsers generated from examples/*.pegjs: Optimization Minified? Size before Size after Saving ------------------------------------------------------------ speed no 719066 716063 0.42% speed yes 188998 180202 4.65% size no 194810 197813 1.52% size yes 108782 99947 8.12% (Minification was done using "uglify --mangle --compress" with uglify-js 2.4.24.)
632 lines
16 KiB
JavaScript
632 lines
16 KiB
JavaScript
"use strict";
|
|
|
|
var arrays = require("../../utils/arrays"),
|
|
objects = require("../../utils/objects"),
|
|
asts = require("../asts"),
|
|
visitor = require("../visitor"),
|
|
op = require("../opcodes"),
|
|
js = require("../js");
|
|
|
|
/* Generates bytecode.
|
|
*
|
|
* Instructions
|
|
* ============
|
|
*
|
|
* Stack Manipulation
|
|
* ------------------
|
|
*
|
|
* [0] PUSH c
|
|
*
|
|
* stack.push(consts[c]);
|
|
*
|
|
* [1] PUSH_UNDEFINED
|
|
*
|
|
* stack.push(undefined);
|
|
*
|
|
* [2] PUSH_NULL
|
|
*
|
|
* stack.push(null);
|
|
*
|
|
* [3] PUSH_FAILED
|
|
*
|
|
* stack.push(FAILED);
|
|
*
|
|
* [4] PUSH_EMPTY_ARRAY
|
|
*
|
|
* stack.push([]);
|
|
*
|
|
* [5] PUSH_CURR_POS
|
|
*
|
|
* stack.push(currPos);
|
|
*
|
|
* [6] POP
|
|
*
|
|
* stack.pop();
|
|
*
|
|
* [7] POP_CURR_POS
|
|
*
|
|
* currPos = stack.pop();
|
|
*
|
|
* [8] POP_N n
|
|
*
|
|
* stack.pop(n);
|
|
*
|
|
* [9] NIP
|
|
*
|
|
* value = stack.pop();
|
|
* stack.pop();
|
|
* stack.push(value);
|
|
*
|
|
* [10] APPEND
|
|
*
|
|
* value = stack.pop();
|
|
* array = stack.pop();
|
|
* array.push(value);
|
|
* stack.push(array);
|
|
*
|
|
* [11] WRAP n
|
|
*
|
|
* stack.push(stack.pop(n));
|
|
*
|
|
* [12] TEXT
|
|
*
|
|
* stack.push(input.substring(stack.pop(), currPos));
|
|
*
|
|
* Conditions and Loops
|
|
* --------------------
|
|
*
|
|
* [13] IF t, f
|
|
*
|
|
* if (stack.top()) {
|
|
* interpret(ip + 3, ip + 3 + t);
|
|
* } else {
|
|
* interpret(ip + 3 + t, ip + 3 + t + f);
|
|
* }
|
|
*
|
|
* [14] IF_ERROR t, f
|
|
*
|
|
* if (stack.top() === FAILED) {
|
|
* interpret(ip + 3, ip + 3 + t);
|
|
* } else {
|
|
* interpret(ip + 3 + t, ip + 3 + t + f);
|
|
* }
|
|
*
|
|
* [15] IF_NOT_ERROR t, f
|
|
*
|
|
* if (stack.top() !== FAILED) {
|
|
* interpret(ip + 3, ip + 3 + t);
|
|
* } else {
|
|
* interpret(ip + 3 + t, ip + 3 + t + f);
|
|
* }
|
|
*
|
|
* [16] WHILE_NOT_ERROR b
|
|
*
|
|
* while(stack.top() !== FAILED) {
|
|
* interpret(ip + 2, ip + 2 + b);
|
|
* }
|
|
*
|
|
* Matching
|
|
* --------
|
|
*
|
|
* [17] MATCH_ANY a, f, ...
|
|
*
|
|
* if (input.length > currPos) {
|
|
* interpret(ip + 3, ip + 3 + a);
|
|
* } else {
|
|
* interpret(ip + 3 + a, ip + 3 + a + f);
|
|
* }
|
|
*
|
|
* [18] MATCH_STRING s, a, f, ...
|
|
*
|
|
* if (input.substr(currPos, consts[s].length) === consts[s]) {
|
|
* interpret(ip + 4, ip + 4 + a);
|
|
* } else {
|
|
* interpret(ip + 4 + a, ip + 4 + a + f);
|
|
* }
|
|
*
|
|
* [19] MATCH_STRING_IC s, a, f, ...
|
|
*
|
|
* if (input.substr(currPos, consts[s].length).toLowerCase() === consts[s]) {
|
|
* interpret(ip + 4, ip + 4 + a);
|
|
* } else {
|
|
* interpret(ip + 4 + a, ip + 4 + a + f);
|
|
* }
|
|
*
|
|
* [20] MATCH_REGEXP r, a, f, ...
|
|
*
|
|
* if (consts[r].test(input.charAt(currPos))) {
|
|
* interpret(ip + 4, ip + 4 + a);
|
|
* } else {
|
|
* interpret(ip + 4 + a, ip + 4 + a + f);
|
|
* }
|
|
*
|
|
* [21] ACCEPT_N n
|
|
*
|
|
* stack.push(input.substring(currPos, n));
|
|
* currPos += n;
|
|
*
|
|
* [22] ACCEPT_STRING s
|
|
*
|
|
* stack.push(consts[s]);
|
|
* currPos += consts[s].length;
|
|
*
|
|
* [23] FAIL e
|
|
*
|
|
* stack.push(FAILED);
|
|
* fail(consts[e]);
|
|
*
|
|
* Calls
|
|
* -----
|
|
*
|
|
* [24] LOAD_SAVED_POS p
|
|
*
|
|
* savedPos = stack[p];
|
|
*
|
|
* [25] UPDATE_SAVED_POS
|
|
*
|
|
* savedPos = currPos;
|
|
*
|
|
* [26] CALL f, n, pc, p1, p2, ..., pN
|
|
*
|
|
* value = consts[f](stack[p1], ..., stack[pN]);
|
|
* stack.pop(n);
|
|
* stack.push(value);
|
|
*
|
|
* Rules
|
|
* -----
|
|
*
|
|
* [27] RULE r
|
|
*
|
|
* stack.push(parseRule(r));
|
|
*
|
|
* Failure Reporting
|
|
* -----------------
|
|
*
|
|
* [28] SILENT_FAILS_ON
|
|
*
|
|
* silentFails++;
|
|
*
|
|
* [29] SILENT_FAILS_OFF
|
|
*
|
|
* silentFails--;
|
|
*/
|
|
function generateBytecode(ast) {
|
|
var consts = [];
|
|
|
|
function addConst(value) {
|
|
var index = arrays.indexOf(consts, value);
|
|
|
|
return index === -1 ? consts.push(value) - 1 : index;
|
|
}
|
|
|
|
function addFunctionConst(params, code) {
|
|
return addConst(
|
|
"function(" + params.join(", ") + ") {" + code + "}"
|
|
);
|
|
}
|
|
|
|
function buildSequence() {
|
|
return Array.prototype.concat.apply([], arguments);
|
|
}
|
|
|
|
function buildCondition(condCode, thenCode, elseCode) {
|
|
return condCode.concat(
|
|
[thenCode.length, elseCode.length],
|
|
thenCode,
|
|
elseCode
|
|
);
|
|
}
|
|
|
|
function buildLoop(condCode, bodyCode) {
|
|
return condCode.concat([bodyCode.length], bodyCode);
|
|
}
|
|
|
|
function buildCall(functionIndex, delta, env, sp) {
|
|
var params = arrays.map(objects.values(env), function(p) { return sp - p; });
|
|
|
|
return [op.CALL, functionIndex, delta, params.length].concat(params);
|
|
}
|
|
|
|
function buildSimplePredicate(expression, negative, context) {
|
|
return buildSequence(
|
|
[op.PUSH_CURR_POS],
|
|
[op.SILENT_FAILS_ON],
|
|
generate(expression, {
|
|
sp: context.sp + 1,
|
|
env: objects.clone(context.env),
|
|
action: null
|
|
}),
|
|
[op.SILENT_FAILS_OFF],
|
|
buildCondition(
|
|
[negative ? op.IF_ERROR : op.IF_NOT_ERROR],
|
|
buildSequence(
|
|
[op.POP],
|
|
[negative ? op.POP : op.POP_CURR_POS],
|
|
[op.PUSH_UNDEFINED]
|
|
),
|
|
buildSequence(
|
|
[op.POP],
|
|
[negative ? op.POP_CURR_POS : op.POP],
|
|
[op.PUSH_FAILED]
|
|
)
|
|
)
|
|
);
|
|
}
|
|
|
|
function buildSemanticPredicate(code, negative, context) {
|
|
var functionIndex = addFunctionConst(objects.keys(context.env), code);
|
|
|
|
return buildSequence(
|
|
[op.UPDATE_SAVED_POS],
|
|
buildCall(functionIndex, 0, context.env, context.sp),
|
|
buildCondition(
|
|
[op.IF],
|
|
buildSequence(
|
|
[op.POP],
|
|
negative ? [op.PUSH_FAILED] : [op.PUSH_UNDEFINED]
|
|
),
|
|
buildSequence(
|
|
[op.POP],
|
|
negative ? [op.PUSH_UNDEFINED] : [op.PUSH_FAILED]
|
|
)
|
|
)
|
|
);
|
|
}
|
|
|
|
function buildAppendLoop(expressionCode) {
|
|
return buildLoop(
|
|
[op.WHILE_NOT_ERROR],
|
|
buildSequence([op.APPEND], expressionCode)
|
|
);
|
|
}
|
|
|
|
var generate = visitor.build({
|
|
grammar: function(node) {
|
|
arrays.each(node.rules, generate);
|
|
|
|
node.consts = consts;
|
|
},
|
|
|
|
rule: function(node) {
|
|
node.bytecode = generate(node.expression, {
|
|
sp: -1, // stack pointer
|
|
env: { }, // mapping of label names to stack positions
|
|
action: null // action nodes pass themselves to children here
|
|
});
|
|
},
|
|
|
|
named: function(node, context) {
|
|
var nameIndex = addConst(
|
|
'peg$otherExpectation("' + js.stringEscape(node.name) + '")'
|
|
);
|
|
|
|
/*
|
|
* The code generated below is slightly suboptimal because |FAIL| pushes
|
|
* to the stack, so we need to stick a |POP| in front of it. We lack a
|
|
* dedicated instruction that would just report the failure and not touch
|
|
* the stack.
|
|
*/
|
|
return buildSequence(
|
|
[op.SILENT_FAILS_ON],
|
|
generate(node.expression, context),
|
|
[op.SILENT_FAILS_OFF],
|
|
buildCondition([op.IF_ERROR], [op.FAIL, nameIndex], [])
|
|
);
|
|
},
|
|
|
|
choice: function(node, context) {
|
|
function buildAlternativesCode(alternatives, context) {
|
|
return buildSequence(
|
|
generate(alternatives[0], {
|
|
sp: context.sp,
|
|
env: objects.clone(context.env),
|
|
action: null
|
|
}),
|
|
alternatives.length > 1
|
|
? buildCondition(
|
|
[op.IF_ERROR],
|
|
buildSequence(
|
|
[op.POP],
|
|
buildAlternativesCode(alternatives.slice(1), context)
|
|
),
|
|
[]
|
|
)
|
|
: []
|
|
);
|
|
}
|
|
|
|
return buildAlternativesCode(node.alternatives, context);
|
|
},
|
|
|
|
action: function(node, context) {
|
|
var env = objects.clone(context.env),
|
|
emitCall = node.expression.type !== "sequence"
|
|
|| node.expression.elements.length === 0,
|
|
expressionCode = generate(node.expression, {
|
|
sp: context.sp + (emitCall ? 1 : 0),
|
|
env: env,
|
|
action: node
|
|
}),
|
|
functionIndex = addFunctionConst(objects.keys(env), node.code);
|
|
|
|
return emitCall
|
|
? buildSequence(
|
|
[op.PUSH_CURR_POS],
|
|
expressionCode,
|
|
buildCondition(
|
|
[op.IF_NOT_ERROR],
|
|
buildSequence(
|
|
[op.LOAD_SAVED_POS, 1],
|
|
buildCall(functionIndex, 1, env, context.sp + 2)
|
|
),
|
|
[]
|
|
),
|
|
[op.NIP]
|
|
)
|
|
: expressionCode;
|
|
},
|
|
|
|
sequence: function(node, context) {
|
|
function buildElementsCode(elements, context) {
|
|
var processedCount, functionIndex;
|
|
|
|
if (elements.length > 0) {
|
|
processedCount = node.elements.length - elements.slice(1).length;
|
|
|
|
return buildSequence(
|
|
generate(elements[0], {
|
|
sp: context.sp,
|
|
env: context.env,
|
|
action: null
|
|
}),
|
|
buildCondition(
|
|
[op.IF_NOT_ERROR],
|
|
buildElementsCode(elements.slice(1), {
|
|
sp: context.sp + 1,
|
|
env: context.env,
|
|
action: context.action
|
|
}),
|
|
buildSequence(
|
|
processedCount > 1 ? [op.POP_N, processedCount] : [op.POP],
|
|
[op.POP_CURR_POS],
|
|
[op.PUSH_FAILED]
|
|
)
|
|
)
|
|
);
|
|
} else {
|
|
if (context.action) {
|
|
functionIndex = addFunctionConst(
|
|
objects.keys(context.env),
|
|
context.action.code
|
|
);
|
|
|
|
return buildSequence(
|
|
[op.LOAD_SAVED_POS, node.elements.length],
|
|
buildCall(
|
|
functionIndex,
|
|
node.elements.length,
|
|
context.env,
|
|
context.sp
|
|
),
|
|
[op.NIP]
|
|
);
|
|
} else {
|
|
return buildSequence([op.WRAP, node.elements.length], [op.NIP]);
|
|
}
|
|
}
|
|
}
|
|
|
|
return buildSequence(
|
|
[op.PUSH_CURR_POS],
|
|
buildElementsCode(node.elements, {
|
|
sp: context.sp + 1,
|
|
env: context.env,
|
|
action: context.action
|
|
})
|
|
);
|
|
},
|
|
|
|
labeled: function(node, context) {
|
|
var env = objects.clone(context.env);
|
|
|
|
context.env[node.label] = context.sp + 1;
|
|
|
|
return generate(node.expression, {
|
|
sp: context.sp,
|
|
env: env,
|
|
action: null
|
|
});
|
|
},
|
|
|
|
text: function(node, context) {
|
|
return buildSequence(
|
|
[op.PUSH_CURR_POS],
|
|
generate(node.expression, {
|
|
sp: context.sp + 1,
|
|
env: objects.clone(context.env),
|
|
action: null
|
|
}),
|
|
buildCondition(
|
|
[op.IF_NOT_ERROR],
|
|
buildSequence([op.POP], [op.TEXT]),
|
|
[op.NIP]
|
|
)
|
|
);
|
|
},
|
|
|
|
simple_and: function(node, context) {
|
|
return buildSimplePredicate(node.expression, false, context);
|
|
},
|
|
|
|
simple_not: function(node, context) {
|
|
return buildSimplePredicate(node.expression, true, context);
|
|
},
|
|
|
|
optional: function(node, context) {
|
|
return buildSequence(
|
|
generate(node.expression, {
|
|
sp: context.sp,
|
|
env: objects.clone(context.env),
|
|
action: null
|
|
}),
|
|
buildCondition(
|
|
[op.IF_ERROR],
|
|
buildSequence([op.POP], [op.PUSH_NULL]),
|
|
[]
|
|
)
|
|
);
|
|
},
|
|
|
|
zero_or_more: function(node, context) {
|
|
var expressionCode = generate(node.expression, {
|
|
sp: context.sp + 1,
|
|
env: objects.clone(context.env),
|
|
action: null
|
|
});
|
|
|
|
return buildSequence(
|
|
[op.PUSH_EMPTY_ARRAY],
|
|
expressionCode,
|
|
buildAppendLoop(expressionCode),
|
|
[op.POP]
|
|
);
|
|
},
|
|
|
|
one_or_more: function(node, context) {
|
|
var expressionCode = generate(node.expression, {
|
|
sp: context.sp + 1,
|
|
env: objects.clone(context.env),
|
|
action: null
|
|
});
|
|
|
|
return buildSequence(
|
|
[op.PUSH_EMPTY_ARRAY],
|
|
expressionCode,
|
|
buildCondition(
|
|
[op.IF_NOT_ERROR],
|
|
buildSequence(buildAppendLoop(expressionCode), [op.POP]),
|
|
buildSequence([op.POP], [op.POP], [op.PUSH_FAILED])
|
|
)
|
|
);
|
|
},
|
|
|
|
group: function(node, context) {
|
|
return generate(node.expression, {
|
|
sp: context.sp,
|
|
env: objects.clone(context.env),
|
|
action: null
|
|
});
|
|
},
|
|
|
|
semantic_and: function(node, context) {
|
|
return buildSemanticPredicate(node.code, false, context);
|
|
},
|
|
|
|
semantic_not: function(node, context) {
|
|
return buildSemanticPredicate(node.code, true, context);
|
|
},
|
|
|
|
rule_ref: function(node) {
|
|
return [op.RULE, asts.indexOfRule(ast, node.name)];
|
|
},
|
|
|
|
literal: function(node) {
|
|
var stringIndex, expectedIndex;
|
|
|
|
if (node.value.length > 0) {
|
|
stringIndex = addConst('"'
|
|
+ js.stringEscape(
|
|
node.ignoreCase ? node.value.toLowerCase() : node.value
|
|
)
|
|
+ '"'
|
|
);
|
|
expectedIndex = addConst(
|
|
'peg$literalExpectation('
|
|
+ '"' + js.stringEscape(node.value) + '", '
|
|
+ node.ignoreCase
|
|
+ ')'
|
|
);
|
|
|
|
/*
|
|
* For case-sensitive strings the value must match the beginning of the
|
|
* remaining input exactly. As a result, we can use |ACCEPT_STRING| and
|
|
* save one |substr| call that would be needed if we used |ACCEPT_N|.
|
|
*/
|
|
return buildCondition(
|
|
node.ignoreCase
|
|
? [op.MATCH_STRING_IC, stringIndex]
|
|
: [op.MATCH_STRING, stringIndex],
|
|
node.ignoreCase
|
|
? [op.ACCEPT_N, node.value.length]
|
|
: [op.ACCEPT_STRING, stringIndex],
|
|
[op.FAIL, expectedIndex]
|
|
);
|
|
} else {
|
|
stringIndex = addConst('""');
|
|
|
|
return [op.PUSH, stringIndex];
|
|
}
|
|
},
|
|
|
|
"class": function(node) {
|
|
var regexp, parts, regexpIndex, expectedIndex;
|
|
|
|
if (node.parts.length > 0) {
|
|
regexp = '/^['
|
|
+ (node.inverted ? '^' : '')
|
|
+ arrays.map(node.parts, function(part) {
|
|
return part instanceof Array
|
|
? js.regexpClassEscape(part[0])
|
|
+ '-'
|
|
+ js.regexpClassEscape(part[1])
|
|
: js.regexpClassEscape(part);
|
|
}).join('')
|
|
+ ']/' + (node.ignoreCase ? 'i' : '');
|
|
} else {
|
|
/*
|
|
* IE considers regexps /[]/ and /[^]/ as syntactically invalid, so we
|
|
* translate them into equivalents it can handle.
|
|
*/
|
|
regexp = node.inverted ? '/^[\\S\\s]/' : '/^(?!)/';
|
|
}
|
|
|
|
parts = '['
|
|
+ arrays.map(node.parts, function(part) {
|
|
return part instanceof Array
|
|
? '["' + js.stringEscape(part[0]) + '", "' + js.stringEscape(part[1]) + '"]'
|
|
: '"' + js.stringEscape(part) + '"';
|
|
}).join(', ')
|
|
+ ']';
|
|
|
|
regexpIndex = addConst(regexp);
|
|
expectedIndex = addConst(
|
|
'peg$classExpectation('
|
|
+ parts + ', '
|
|
+ node.inverted + ', '
|
|
+ node.ignoreCase
|
|
+ ')'
|
|
);
|
|
|
|
return buildCondition(
|
|
[op.MATCH_REGEXP, regexpIndex],
|
|
[op.ACCEPT_N, 1],
|
|
[op.FAIL, expectedIndex]
|
|
);
|
|
},
|
|
|
|
any: function() {
|
|
var expectedIndex = addConst('peg$anyExpectation()');
|
|
|
|
return buildCondition(
|
|
[op.MATCH_ANY],
|
|
[op.ACCEPT_N, 1],
|
|
[op.FAIL, expectedIndex]
|
|
);
|
|
}
|
|
});
|
|
|
|
generate(ast);
|
|
}
|
|
|
|
module.exports = generateBytecode;
|