dad1207c46
The TEXT instruction now replaces position at the top of the stack with the input from that position until the current position. This is simpler and cleaner semantics than the previous one, where TEXT also popped an additional value from the stack and kept the position there.
615 lines
15 KiB
JavaScript
615 lines
15 KiB
JavaScript
var arrays = require("../../utils/arrays"),
|
|
objects = require("../../utils/objects"),
|
|
asts = require("../asts"),
|
|
visitor = require("../visitor"),
|
|
op = require("../opcodes"),
|
|
js = require("../javascript");
|
|
|
|
/* Generates bytecode.
|
|
*
|
|
* Instructions
|
|
* ============
|
|
*
|
|
* Stack Manipulation
|
|
* ------------------
|
|
*
|
|
* [0] PUSH c
|
|
*
|
|
* stack.push(consts[c]);
|
|
*
|
|
* [26] PUSH_UNDEFINED
|
|
*
|
|
* stack.push(undefined);
|
|
*
|
|
* [27] PUSH_NULL
|
|
*
|
|
* stack.push(null);
|
|
*
|
|
* [28] PUSH_FAILED
|
|
*
|
|
* stack.push(FAILED);
|
|
*
|
|
* [29] PUSH_EMPTY_ARRAY
|
|
*
|
|
* stack.push([]);
|
|
*
|
|
* [1] PUSH_CURR_POS
|
|
*
|
|
* stack.push(currPos);
|
|
*
|
|
* [2] POP
|
|
*
|
|
* stack.pop();
|
|
*
|
|
* [3] POP_CURR_POS
|
|
*
|
|
* currPos = stack.pop();
|
|
*
|
|
* [4] POP_N n
|
|
*
|
|
* stack.pop(n);
|
|
*
|
|
* [5] NIP
|
|
*
|
|
* value = stack.pop();
|
|
* stack.pop();
|
|
* stack.push(value);
|
|
*
|
|
* [6] APPEND
|
|
*
|
|
* value = stack.pop();
|
|
* array = stack.pop();
|
|
* array.push(value);
|
|
* stack.push(array);
|
|
*
|
|
* [7] WRAP n
|
|
*
|
|
* stack.push(stack.pop(n));
|
|
*
|
|
* [8] TEXT
|
|
*
|
|
* stack.push(input.substring(stack.pop(), currPos));
|
|
*
|
|
* Conditions and Loops
|
|
* --------------------
|
|
*
|
|
* [9] IF t, f
|
|
*
|
|
* if (stack.top()) {
|
|
* interpret(ip + 3, ip + 3 + t);
|
|
* } else {
|
|
* interpret(ip + 3 + t, ip + 3 + t + f);
|
|
* }
|
|
*
|
|
* [10] IF_ERROR t, f
|
|
*
|
|
* if (stack.top() === FAILED) {
|
|
* interpret(ip + 3, ip + 3 + t);
|
|
* } else {
|
|
* interpret(ip + 3 + t, ip + 3 + t + f);
|
|
* }
|
|
*
|
|
* [11] IF_NOT_ERROR t, f
|
|
*
|
|
* if (stack.top() !== FAILED) {
|
|
* interpret(ip + 3, ip + 3 + t);
|
|
* } else {
|
|
* interpret(ip + 3 + t, ip + 3 + t + f);
|
|
* }
|
|
*
|
|
* [12] WHILE_NOT_ERROR b
|
|
*
|
|
* while(stack.top() !== FAILED) {
|
|
* interpret(ip + 2, ip + 2 + b);
|
|
* }
|
|
*
|
|
* Matching
|
|
* --------
|
|
*
|
|
* [13] MATCH_ANY a, f, ...
|
|
*
|
|
* if (input.length > currPos) {
|
|
* interpret(ip + 3, ip + 3 + a);
|
|
* } else {
|
|
* interpret(ip + 3 + a, ip + 3 + a + f);
|
|
* }
|
|
*
|
|
* [14] MATCH_STRING s, a, f, ...
|
|
*
|
|
* if (input.substr(currPos, consts[s].length) === consts[s]) {
|
|
* interpret(ip + 4, ip + 4 + a);
|
|
* } else {
|
|
* interpret(ip + 4 + a, ip + 4 + a + f);
|
|
* }
|
|
*
|
|
* [15] MATCH_STRING_IC s, a, f, ...
|
|
*
|
|
* if (input.substr(currPos, consts[s].length).toLowerCase() === consts[s]) {
|
|
* interpret(ip + 4, ip + 4 + a);
|
|
* } else {
|
|
* interpret(ip + 4 + a, ip + 4 + a + f);
|
|
* }
|
|
*
|
|
* [16] MATCH_REGEXP r, a, f, ...
|
|
*
|
|
* if (consts[r].test(input.charAt(currPos))) {
|
|
* interpret(ip + 4, ip + 4 + a);
|
|
* } else {
|
|
* interpret(ip + 4 + a, ip + 4 + a + f);
|
|
* }
|
|
*
|
|
* [17] ACCEPT_N n
|
|
*
|
|
* stack.push(input.substring(currPos, n));
|
|
* currPos += n;
|
|
*
|
|
* [18] ACCEPT_STRING s
|
|
*
|
|
* stack.push(consts[s]);
|
|
* currPos += consts[s].length;
|
|
*
|
|
* [19] FAIL e
|
|
*
|
|
* stack.push(FAILED);
|
|
* fail(consts[e]);
|
|
*
|
|
* Calls
|
|
* -----
|
|
*
|
|
* [20] REPORT_SAVED_POS p
|
|
*
|
|
* reportedPos = stack[p];
|
|
*
|
|
* [21] REPORT_CURR_POS
|
|
*
|
|
* reportedPos = currPos;
|
|
*
|
|
* [22] CALL f, n, pc, p1, p2, ..., pN
|
|
*
|
|
* value = consts[f](stack[p1], ..., stack[pN]);
|
|
* stack.pop(n);
|
|
* stack.push(value);
|
|
*
|
|
* Rules
|
|
* -----
|
|
*
|
|
* [23] RULE r
|
|
*
|
|
* stack.push(parseRule(r));
|
|
*
|
|
* Failure Reporting
|
|
* -----------------
|
|
*
|
|
* [24] SILENT_FAILS_ON
|
|
*
|
|
* silentFails++;
|
|
*
|
|
* [25] SILENT_FAILS_OFF
|
|
*
|
|
* silentFails--;
|
|
*/
|
|
function generateBytecode(ast) {
|
|
var consts = [];
|
|
|
|
function addConst(value) {
|
|
var index = arrays.indexOf(consts, value);
|
|
|
|
return index === -1 ? consts.push(value) - 1 : index;
|
|
}
|
|
|
|
function addFunctionConst(params, code) {
|
|
return addConst(
|
|
"function(" + params.join(", ") + ") {" + code + "}"
|
|
);
|
|
}
|
|
|
|
function buildSequence() {
|
|
return Array.prototype.concat.apply([], arguments);
|
|
}
|
|
|
|
function buildCondition(condCode, thenCode, elseCode) {
|
|
return condCode.concat(
|
|
[thenCode.length, elseCode.length],
|
|
thenCode,
|
|
elseCode
|
|
);
|
|
}
|
|
|
|
function buildLoop(condCode, bodyCode) {
|
|
return condCode.concat([bodyCode.length], bodyCode);
|
|
}
|
|
|
|
function buildCall(functionIndex, delta, env, sp) {
|
|
var params = arrays.map(objects.values(env), function(p) { return sp - p; });
|
|
|
|
return [op.CALL, functionIndex, delta, params.length].concat(params);
|
|
}
|
|
|
|
function buildSimplePredicate(expression, negative, context) {
|
|
return buildSequence(
|
|
[op.PUSH_CURR_POS],
|
|
[op.SILENT_FAILS_ON],
|
|
generate(expression, {
|
|
sp: context.sp + 1,
|
|
env: { },
|
|
action: null
|
|
}),
|
|
[op.SILENT_FAILS_OFF],
|
|
buildCondition(
|
|
[negative ? op.IF_ERROR : op.IF_NOT_ERROR],
|
|
buildSequence(
|
|
[op.POP],
|
|
[negative ? op.POP : op.POP_CURR_POS],
|
|
[op.PUSH_UNDEFINED]
|
|
),
|
|
buildSequence(
|
|
[op.POP],
|
|
[negative ? op.POP_CURR_POS : op.POP],
|
|
[op.PUSH_FAILED]
|
|
)
|
|
)
|
|
);
|
|
}
|
|
|
|
function buildSemanticPredicate(code, negative, context) {
|
|
var functionIndex = addFunctionConst(objects.keys(context.env), code);
|
|
|
|
return buildSequence(
|
|
[op.REPORT_CURR_POS],
|
|
buildCall(functionIndex, 0, context.env, context.sp),
|
|
buildCondition(
|
|
[op.IF],
|
|
buildSequence(
|
|
[op.POP],
|
|
negative ? [op.PUSH_FAILED] : [op.PUSH_UNDEFINED]
|
|
),
|
|
buildSequence(
|
|
[op.POP],
|
|
negative ? [op.PUSH_UNDEFINED] : [op.PUSH_FAILED]
|
|
)
|
|
)
|
|
);
|
|
}
|
|
|
|
function buildAppendLoop(expressionCode) {
|
|
return buildLoop(
|
|
[op.WHILE_NOT_ERROR],
|
|
buildSequence([op.APPEND], expressionCode)
|
|
);
|
|
}
|
|
|
|
var generate = visitor.build({
|
|
grammar: function(node) {
|
|
arrays.each(node.rules, generate);
|
|
|
|
node.consts = consts;
|
|
},
|
|
|
|
rule: function(node) {
|
|
node.bytecode = generate(node.expression, {
|
|
sp: -1, // stack pointer
|
|
env: { }, // mapping of label names to stack positions
|
|
action: null // action nodes pass themselves to children here
|
|
});
|
|
},
|
|
|
|
named: function(node, context) {
|
|
var nameIndex = addConst(
|
|
'{ type: "other", description: "' + js.stringEscape(node.name) + '" }'
|
|
);
|
|
|
|
/*
|
|
* The code generated below is slightly suboptimal because |FAIL| pushes
|
|
* to the stack, so we need to stick a |POP| in front of it. We lack a
|
|
* dedicated instruction that would just report the failure and not touch
|
|
* the stack.
|
|
*/
|
|
return buildSequence(
|
|
[op.SILENT_FAILS_ON],
|
|
generate(node.expression, context),
|
|
[op.SILENT_FAILS_OFF],
|
|
buildCondition([op.IF_ERROR], [op.FAIL, nameIndex], [])
|
|
);
|
|
},
|
|
|
|
choice: function(node, context) {
|
|
function buildAlternativesCode(alternatives, context) {
|
|
return buildSequence(
|
|
generate(alternatives[0], {
|
|
sp: context.sp,
|
|
env: { },
|
|
action: null
|
|
}),
|
|
alternatives.length > 1
|
|
? buildCondition(
|
|
[op.IF_ERROR],
|
|
buildSequence(
|
|
[op.POP],
|
|
buildAlternativesCode(alternatives.slice(1), context)
|
|
),
|
|
[]
|
|
)
|
|
: []
|
|
);
|
|
}
|
|
|
|
return buildAlternativesCode(node.alternatives, context);
|
|
},
|
|
|
|
action: function(node, context) {
|
|
var env = { },
|
|
emitCall = node.expression.type !== "sequence"
|
|
|| node.expression.elements.length === 0,
|
|
expressionCode = generate(node.expression, {
|
|
sp: context.sp + (emitCall ? 1 : 0),
|
|
env: env,
|
|
action: node
|
|
}),
|
|
functionIndex = addFunctionConst(objects.keys(env), node.code);
|
|
|
|
return emitCall
|
|
? buildSequence(
|
|
[op.PUSH_CURR_POS],
|
|
expressionCode,
|
|
buildCondition(
|
|
[op.IF_NOT_ERROR],
|
|
buildSequence(
|
|
[op.REPORT_SAVED_POS, 1],
|
|
buildCall(functionIndex, 1, env, context.sp + 2)
|
|
),
|
|
[]
|
|
),
|
|
[op.NIP]
|
|
)
|
|
: expressionCode;
|
|
},
|
|
|
|
sequence: function(node, context) {
|
|
function buildElementsCode(elements, context) {
|
|
var processedCount, functionIndex;
|
|
|
|
if (elements.length > 0) {
|
|
processedCount = node.elements.length - elements.slice(1).length;
|
|
|
|
return buildSequence(
|
|
generate(elements[0], {
|
|
sp: context.sp,
|
|
env: context.env,
|
|
action: null
|
|
}),
|
|
buildCondition(
|
|
[op.IF_NOT_ERROR],
|
|
buildElementsCode(elements.slice(1), {
|
|
sp: context.sp + 1,
|
|
env: context.env,
|
|
action: context.action
|
|
}),
|
|
buildSequence(
|
|
processedCount > 1 ? [op.POP_N, processedCount] : [op.POP],
|
|
[op.POP_CURR_POS],
|
|
[op.PUSH_FAILED]
|
|
)
|
|
)
|
|
);
|
|
} else {
|
|
if (context.action) {
|
|
functionIndex = addFunctionConst(
|
|
objects.keys(context.env),
|
|
context.action.code
|
|
);
|
|
|
|
return buildSequence(
|
|
[op.REPORT_SAVED_POS, node.elements.length],
|
|
buildCall(
|
|
functionIndex,
|
|
node.elements.length,
|
|
context.env,
|
|
context.sp
|
|
),
|
|
[op.NIP]
|
|
);
|
|
} else {
|
|
return buildSequence([op.WRAP, node.elements.length], [op.NIP]);
|
|
}
|
|
}
|
|
}
|
|
|
|
return buildSequence(
|
|
[op.PUSH_CURR_POS],
|
|
buildElementsCode(node.elements, {
|
|
sp: context.sp + 1,
|
|
env: context.env,
|
|
action: context.action
|
|
})
|
|
);
|
|
},
|
|
|
|
labeled: function(node, context) {
|
|
context.env[node.label] = context.sp + 1;
|
|
|
|
return generate(node.expression, {
|
|
sp: context.sp,
|
|
env: { },
|
|
action: null
|
|
});
|
|
},
|
|
|
|
text: function(node, context) {
|
|
return buildSequence(
|
|
[op.PUSH_CURR_POS],
|
|
generate(node.expression, {
|
|
sp: context.sp + 1,
|
|
env: { },
|
|
action: null
|
|
}),
|
|
buildCondition(
|
|
[op.IF_NOT_ERROR],
|
|
buildSequence([op.POP], [op.TEXT]),
|
|
[op.NIP]
|
|
)
|
|
);
|
|
},
|
|
|
|
simple_and: function(node, context) {
|
|
return buildSimplePredicate(node.expression, false, context);
|
|
},
|
|
|
|
simple_not: function(node, context) {
|
|
return buildSimplePredicate(node.expression, true, context);
|
|
},
|
|
|
|
semantic_and: function(node, context) {
|
|
return buildSemanticPredicate(node.code, false, context);
|
|
},
|
|
|
|
semantic_not: function(node, context) {
|
|
return buildSemanticPredicate(node.code, true, context);
|
|
},
|
|
|
|
optional: function(node, context) {
|
|
return buildSequence(
|
|
generate(node.expression, {
|
|
sp: context.sp,
|
|
env: { },
|
|
action: null
|
|
}),
|
|
buildCondition(
|
|
[op.IF_ERROR],
|
|
buildSequence([op.POP], [op.PUSH_NULL]),
|
|
[]
|
|
)
|
|
);
|
|
},
|
|
|
|
zero_or_more: function(node, context) {
|
|
var expressionCode = generate(node.expression, {
|
|
sp: context.sp + 1,
|
|
env: { },
|
|
action: null
|
|
});
|
|
|
|
return buildSequence(
|
|
[op.PUSH_EMPTY_ARRAY],
|
|
expressionCode,
|
|
buildAppendLoop(expressionCode),
|
|
[op.POP]
|
|
);
|
|
},
|
|
|
|
one_or_more: function(node, context) {
|
|
var expressionCode = generate(node.expression, {
|
|
sp: context.sp + 1,
|
|
env: { },
|
|
action: null
|
|
});
|
|
|
|
return buildSequence(
|
|
[op.PUSH_EMPTY_ARRAY],
|
|
expressionCode,
|
|
buildCondition(
|
|
[op.IF_NOT_ERROR],
|
|
buildSequence(buildAppendLoop(expressionCode), [op.POP]),
|
|
buildSequence([op.POP], [op.POP], [op.PUSH_FAILED])
|
|
)
|
|
);
|
|
},
|
|
|
|
rule_ref: function(node) {
|
|
return [op.RULE, asts.indexOfRule(ast, node.name)];
|
|
},
|
|
|
|
literal: function(node) {
|
|
var stringIndex, expectedIndex;
|
|
|
|
if (node.value.length > 0) {
|
|
stringIndex = addConst('"'
|
|
+ js.stringEscape(
|
|
node.ignoreCase ? node.value.toLowerCase() : node.value
|
|
)
|
|
+ '"'
|
|
);
|
|
expectedIndex = addConst([
|
|
'{',
|
|
'type: "literal",',
|
|
'value: "' + js.stringEscape(node.value) + '",',
|
|
'description: "'
|
|
+ js.stringEscape('"' + js.stringEscape(node.value) + '"')
|
|
+ '"',
|
|
'}'
|
|
].join(' '));
|
|
|
|
/*
|
|
* For case-sensitive strings the value must match the beginning of the
|
|
* remaining input exactly. As a result, we can use |ACCEPT_STRING| and
|
|
* save one |substr| call that would be needed if we used |ACCEPT_N|.
|
|
*/
|
|
return buildCondition(
|
|
node.ignoreCase
|
|
? [op.MATCH_STRING_IC, stringIndex]
|
|
: [op.MATCH_STRING, stringIndex],
|
|
node.ignoreCase
|
|
? [op.ACCEPT_N, node.value.length]
|
|
: [op.ACCEPT_STRING, stringIndex],
|
|
[op.FAIL, expectedIndex]
|
|
);
|
|
} else {
|
|
stringIndex = addConst('""');
|
|
|
|
return [op.PUSH, stringIndex];
|
|
}
|
|
},
|
|
|
|
"class": function(node) {
|
|
var regexp, regexpIndex, expectedIndex;
|
|
|
|
if (node.parts.length > 0) {
|
|
regexp = '/^['
|
|
+ (node.inverted ? '^' : '')
|
|
+ arrays.map(node.parts, function(part) {
|
|
return part instanceof Array
|
|
? js.regexpClassEscape(part[0])
|
|
+ '-'
|
|
+ js.regexpClassEscape(part[1])
|
|
: js.regexpClassEscape(part);
|
|
}).join('')
|
|
+ ']/' + (node.ignoreCase ? 'i' : '');
|
|
} else {
|
|
/*
|
|
* IE considers regexps /[]/ and /[^]/ as syntactically invalid, so we
|
|
* translate them into euqivalents it can handle.
|
|
*/
|
|
regexp = node.inverted ? '/^[\\S\\s]/' : '/^(?!)/';
|
|
}
|
|
|
|
regexpIndex = addConst(regexp);
|
|
expectedIndex = addConst([
|
|
'{',
|
|
'type: "class",',
|
|
'value: "' + js.stringEscape(node.rawText) + '",',
|
|
'description: "' + js.stringEscape(node.rawText) + '"',
|
|
'}'
|
|
].join(' '));
|
|
|
|
return buildCondition(
|
|
[op.MATCH_REGEXP, regexpIndex],
|
|
[op.ACCEPT_N, 1],
|
|
[op.FAIL, expectedIndex]
|
|
);
|
|
},
|
|
|
|
any: function() {
|
|
var expectedIndex = addConst('{ type: "any", description: "any character" }');
|
|
|
|
return buildCondition(
|
|
[op.MATCH_ANY],
|
|
[op.ACCEPT_N, 1],
|
|
[op.FAIL, expectedIndex]
|
|
);
|
|
}
|
|
});
|
|
|
|
generate(ast);
|
|
}
|
|
|
|
module.exports = generateBytecode;
|