"use strict"; var arrays = require("../../utils/arrays"), objects = require("../../utils/objects"), asts = require("../asts"), visitor = require("../visitor"), op = require("../opcodes"), js = require("../js"); /* Generates bytecode. * * Instructions * ============ * * Stack Manipulation * ------------------ * * [0] PUSH c * * stack.push(consts[c]); * * [1] PUSH_UNDEFINED * * stack.push(undefined); * * [2] PUSH_NULL * * stack.push(null); * * [3] PUSH_FAILED * * stack.push(FAILED); * * [4] PUSH_EMPTY_ARRAY * * stack.push([]); * * [5] PUSH_CURR_POS * * stack.push(currPos); * * [6] POP * * stack.pop(); * * [7] POP_CURR_POS * * currPos = stack.pop(); * * [8] POP_N n * * stack.pop(n); * * [9] NIP * * value = stack.pop(); * stack.pop(); * stack.push(value); * * [10] APPEND * * value = stack.pop(); * array = stack.pop(); * array.push(value); * stack.push(array); * * [11] WRAP n * * stack.push(stack.pop(n)); * * [12] TEXT * * stack.push(input.substring(stack.pop(), currPos)); * * Conditions and Loops * -------------------- * * [13] IF t, f * * if (stack.top()) { * interpret(ip + 3, ip + 3 + t); * } else { * interpret(ip + 3 + t, ip + 3 + t + f); * } * * [14] IF_ERROR t, f * * if (stack.top() === FAILED) { * interpret(ip + 3, ip + 3 + t); * } else { * interpret(ip + 3 + t, ip + 3 + t + f); * } * * [15] IF_NOT_ERROR t, f * * if (stack.top() !== FAILED) { * interpret(ip + 3, ip + 3 + t); * } else { * interpret(ip + 3 + t, ip + 3 + t + f); * } * * [16] WHILE_NOT_ERROR b * * while(stack.top() !== FAILED) { * interpret(ip + 2, ip + 2 + b); * } * * Matching * -------- * * [17] MATCH_ANY a, f, ... * * if (input.length > currPos) { * interpret(ip + 3, ip + 3 + a); * } else { * interpret(ip + 3 + a, ip + 3 + a + f); * } * * [18] MATCH_STRING s, a, f, ... * * if (input.substr(currPos, consts[s].length) === consts[s]) { * interpret(ip + 4, ip + 4 + a); * } else { * interpret(ip + 4 + a, ip + 4 + a + f); * } * * [19] MATCH_STRING_IC s, a, f, ... * * if (input.substr(currPos, consts[s].length).toLowerCase() === consts[s]) { * interpret(ip + 4, ip + 4 + a); * } else { * interpret(ip + 4 + a, ip + 4 + a + f); * } * * [20] MATCH_REGEXP r, a, f, ... * * if (consts[r].test(input.charAt(currPos))) { * interpret(ip + 4, ip + 4 + a); * } else { * interpret(ip + 4 + a, ip + 4 + a + f); * } * * [21] ACCEPT_N n * * stack.push(input.substring(currPos, n)); * currPos += n; * * [22] ACCEPT_STRING s * * stack.push(consts[s]); * currPos += consts[s].length; * * [23] FAIL e * * stack.push(FAILED); * fail(consts[e]); * * Calls * ----- * * [24] LOAD_SAVED_POS p * * savedPos = stack[p]; * * [25] UPDATE_SAVED_POS * * savedPos = currPos; * * [26] CALL f, n, pc, p1, p2, ..., pN * * value = consts[f](stack[p1], ..., stack[pN]); * stack.pop(n); * stack.push(value); * * Rules * ----- * * [27] RULE r * * stack.push(parseRule(r)); * * Failure Reporting * ----------------- * * [28] SILENT_FAILS_ON * * silentFails++; * * [29] SILENT_FAILS_OFF * * silentFails--; */ function generateBytecode(ast) { var consts = []; function addConst(value) { var index = arrays.indexOf(consts, value); return index === -1 ? consts.push(value) - 1 : index; } function addFunctionConst(params, code) { return addConst( "function(" + params.join(", ") + ") {" + code + "}" ); } function buildSequence() { return Array.prototype.concat.apply([], arguments); } function buildCondition(condCode, thenCode, elseCode) { return condCode.concat( [thenCode.length, elseCode.length], thenCode, elseCode ); } function buildLoop(condCode, bodyCode) { return condCode.concat([bodyCode.length], bodyCode); } function buildCall(functionIndex, delta, env, sp) { var params = arrays.map(objects.values(env), function(p) { return sp - p; }); return [op.CALL, functionIndex, delta, params.length].concat(params); } function buildSimplePredicate(expression, negative, context) { return buildSequence( [op.PUSH_CURR_POS], [op.SILENT_FAILS_ON], generate(expression, { sp: context.sp + 1, env: objects.clone(context.env), action: null }), [op.SILENT_FAILS_OFF], buildCondition( [negative ? op.IF_ERROR : op.IF_NOT_ERROR], buildSequence( [op.POP], [negative ? op.POP : op.POP_CURR_POS], [op.PUSH_UNDEFINED] ), buildSequence( [op.POP], [negative ? op.POP_CURR_POS : op.POP], [op.PUSH_FAILED] ) ) ); } function buildSemanticPredicate(code, negative, context) { var functionIndex = addFunctionConst(objects.keys(context.env), code); return buildSequence( [op.UPDATE_SAVED_POS], buildCall(functionIndex, 0, context.env, context.sp), buildCondition( [op.IF], buildSequence( [op.POP], negative ? [op.PUSH_FAILED] : [op.PUSH_UNDEFINED] ), buildSequence( [op.POP], negative ? [op.PUSH_UNDEFINED] : [op.PUSH_FAILED] ) ) ); } function buildAppendLoop(expressionCode) { return buildLoop( [op.WHILE_NOT_ERROR], buildSequence([op.APPEND], expressionCode) ); } var generate = visitor.build({ grammar: function(node) { arrays.each(node.rules, generate); node.consts = consts; }, rule: function(node) { node.bytecode = generate(node.expression, { sp: -1, // stack pointer env: { }, // mapping of label names to stack positions action: null // action nodes pass themselves to children here }); }, named: function(node, context) { var nameIndex = addConst( '{ type: "other", description: "' + js.stringEscape(node.name) + '" }' ); /* * The code generated below is slightly suboptimal because |FAIL| pushes * to the stack, so we need to stick a |POP| in front of it. We lack a * dedicated instruction that would just report the failure and not touch * the stack. */ return buildSequence( [op.SILENT_FAILS_ON], generate(node.expression, context), [op.SILENT_FAILS_OFF], buildCondition([op.IF_ERROR], [op.FAIL, nameIndex], []) ); }, choice: function(node, context) { function buildAlternativesCode(alternatives, context) { return buildSequence( generate(alternatives[0], { sp: context.sp, env: objects.clone(context.env), action: null }), alternatives.length > 1 ? buildCondition( [op.IF_ERROR], buildSequence( [op.POP], buildAlternativesCode(alternatives.slice(1), context) ), [] ) : [] ); } return buildAlternativesCode(node.alternatives, context); }, action: function(node, context) { var env = objects.clone(context.env), emitCall = node.expression.type !== "sequence" || node.expression.elements.length === 0, expressionCode = generate(node.expression, { sp: context.sp + (emitCall ? 1 : 0), env: env, action: node }), functionIndex = addFunctionConst(objects.keys(env), node.code); return emitCall ? buildSequence( [op.PUSH_CURR_POS], expressionCode, buildCondition( [op.IF_NOT_ERROR], buildSequence( [op.LOAD_SAVED_POS, 1], buildCall(functionIndex, 1, env, context.sp + 2) ), [] ), [op.NIP] ) : expressionCode; }, sequence: function(node, context) { function buildElementsCode(elements, context) { var processedCount, functionIndex; if (elements.length > 0) { processedCount = node.elements.length - elements.slice(1).length; return buildSequence( generate(elements[0], { sp: context.sp, env: context.env, action: null }), buildCondition( [op.IF_NOT_ERROR], buildElementsCode(elements.slice(1), { sp: context.sp + 1, env: context.env, action: context.action }), buildSequence( processedCount > 1 ? [op.POP_N, processedCount] : [op.POP], [op.POP_CURR_POS], [op.PUSH_FAILED] ) ) ); } else { if (context.action) { functionIndex = addFunctionConst( objects.keys(context.env), context.action.code ); return buildSequence( [op.LOAD_SAVED_POS, node.elements.length], buildCall( functionIndex, node.elements.length, context.env, context.sp ), [op.NIP] ); } else { return buildSequence([op.WRAP, node.elements.length], [op.NIP]); } } } return buildSequence( [op.PUSH_CURR_POS], buildElementsCode(node.elements, { sp: context.sp + 1, env: context.env, action: context.action }) ); }, labeled: function(node, context) { var env = objects.clone(context.env); context.env[node.label] = context.sp + 1; return generate(node.expression, { sp: context.sp, env: env, action: null }); }, text: function(node, context) { return buildSequence( [op.PUSH_CURR_POS], generate(node.expression, { sp: context.sp + 1, env: objects.clone(context.env), action: null }), buildCondition( [op.IF_NOT_ERROR], buildSequence([op.POP], [op.TEXT]), [op.NIP] ) ); }, simple_and: function(node, context) { return buildSimplePredicate(node.expression, false, context); }, simple_not: function(node, context) { return buildSimplePredicate(node.expression, true, context); }, optional: function(node, context) { return buildSequence( generate(node.expression, { sp: context.sp, env: objects.clone(context.env), action: null }), buildCondition( [op.IF_ERROR], buildSequence([op.POP], [op.PUSH_NULL]), [] ) ); }, zero_or_more: function(node, context) { var expressionCode = generate(node.expression, { sp: context.sp + 1, env: objects.clone(context.env), action: null }); return buildSequence( [op.PUSH_EMPTY_ARRAY], expressionCode, buildAppendLoop(expressionCode), [op.POP] ); }, one_or_more: function(node, context) { var expressionCode = generate(node.expression, { sp: context.sp + 1, env: objects.clone(context.env), action: null }); return buildSequence( [op.PUSH_EMPTY_ARRAY], expressionCode, buildCondition( [op.IF_NOT_ERROR], buildSequence(buildAppendLoop(expressionCode), [op.POP]), buildSequence([op.POP], [op.POP], [op.PUSH_FAILED]) ) ); }, group: function(node, context) { return generate(node.expression, { sp: context.sp, env: objects.clone(context.env), action: null }); }, semantic_and: function(node, context) { return buildSemanticPredicate(node.code, false, context); }, semantic_not: function(node, context) { return buildSemanticPredicate(node.code, true, context); }, rule_ref: function(node) { return [op.RULE, asts.indexOfRule(ast, node.name)]; }, literal: function(node) { var stringIndex, expectedIndex; if (node.value.length > 0) { stringIndex = addConst('"' + js.stringEscape( node.ignoreCase ? node.value.toLowerCase() : node.value ) + '"' ); expectedIndex = addConst([ '{', 'type: "literal",', 'value: "' + js.stringEscape(node.value) + '",', 'description: "' + js.stringEscape(node.rawText) + '"', '}' ].join(' ')); /* * For case-sensitive strings the value must match the beginning of the * remaining input exactly. As a result, we can use |ACCEPT_STRING| and * save one |substr| call that would be needed if we used |ACCEPT_N|. */ return buildCondition( node.ignoreCase ? [op.MATCH_STRING_IC, stringIndex] : [op.MATCH_STRING, stringIndex], node.ignoreCase ? [op.ACCEPT_N, node.value.length] : [op.ACCEPT_STRING, stringIndex], [op.FAIL, expectedIndex] ); } else { stringIndex = addConst('""'); return [op.PUSH, stringIndex]; } }, "class": function(node) { var regexp, regexpIndex, expectedIndex; if (node.parts.length > 0) { regexp = '/^[' + (node.inverted ? '^' : '') + arrays.map(node.parts, function(part) { return part instanceof Array ? js.regexpClassEscape(part[0]) + '-' + js.regexpClassEscape(part[1]) : js.regexpClassEscape(part); }).join('') + ']/' + (node.ignoreCase ? 'i' : ''); } else { /* * IE considers regexps /[]/ and /[^]/ as syntactically invalid, so we * translate them into euqivalents it can handle. */ regexp = node.inverted ? '/^[\\S\\s]/' : '/^(?!)/'; } regexpIndex = addConst(regexp); expectedIndex = addConst([ '{', 'type: "class",', 'value: "' + js.stringEscape(node.rawText) + '",', 'description: "' + js.stringEscape(node.rawText) + '"', '}' ].join(' ')); return buildCondition( [op.MATCH_REGEXP, regexpIndex], [op.ACCEPT_N, 1], [op.FAIL, expectedIndex] ); }, any: function() { var expectedIndex = addConst('{ type: "any", description: "any character" }'); return buildCondition( [op.MATCH_ANY], [op.ACCEPT_N, 1], [op.FAIL, expectedIndex] ); } }); generate(ast); } module.exports = generateBytecode;