pegjs/lib/compiler/passes/generate-bytecode.js
2013-01-06 15:58:48 +01:00

595 lines
15 KiB
JavaScript

var utils = require("../../utils"),
op = require("../opcodes");
/* Generates bytecode.
*
* Instructions
* ============
*
* Stack Manipulation
* ------------------
*
* [0] PUSH c
*
* stack.push(consts[c]);
*
* [1] PUSH_CURR_POS
*
* stack.push(currPos);
*
* [2] POP
*
* stack.pop();
*
* [3] POP_CURR_POS
*
* currPos = stack.pop();
*
* [4] POP_N n
*
* stack.pop(n);
*
* [5] NIP
*
* value = stack.pop();
* stack.pop();
* stack.push(value);
*
* [6] NIP_CURR_POS
*
* value = stack.pop();
* currPos = stack.pop();
* stack.push(value);
*
* [7] APPEND
*
* value = stack.pop();
* array = stack.pop();
* array.push(value);
* stack.push(array);
*
* [8] WRAP n
*
* stack.push(stack.pop(n));
*
* [9] TEXT
*
* stack.pop();
* stack.push(input.substring(stack.top(), currPos));
*
* Conditions and Loops
* --------------------
*
* [10] IF t, f
*
* if (stack.top()) {
* interpret(ip + 3, ip + 3 + t);
* } else {
* interpret(ip + 3 + t, ip + 3 + t + f);
* }
*
* [11] IF_ERROR t, f
*
* if (stack.top() === null) {
* interpret(ip + 3, ip + 3 + t);
* } else {
* interpret(ip + 3 + t, ip + 3 + t + f);
* }
*
* [12] IF_NOT_ERROR t, f
*
* if (stack.top() !== null) {
* interpret(ip + 3, ip + 3 + t);
* } else {
* interpret(ip + 3 + t, ip + 3 + t + f);
* }
*
* [13] WHILE_NOT_ERROR b
*
* while(stack.top() !== null) {
* interpret(ip + 2, ip + 2 + b);
* }
*
* Matching
* --------
*
* [14] MATCH_ANY a, f, ...
*
* if (input.length > currPos) {
* interpret(ip + 3, ip + 3 + a);
* } else {
* interpret(ip + 3 + a, ip + 3 + a + f);
* }
*
* [15] MATCH_STRING s, a, f, ...
*
* if (input.substr(currPos, consts[s].length) === consts[s]) {
* interpret(ip + 4, ip + 4 + a);
* } else {
* interpret(ip + 4 + a, ip + 4 + a + f);
* }
*
* [16] MATCH_STRING_IC s, a, f, ...
*
* if (input.substr(currPos, consts[s].length).toLowerCase() === consts[s]) {
* interpret(ip + 4, ip + 4 + a);
* } else {
* interpret(ip + 4 + a, ip + 4 + a + f);
* }
*
* [17] MATCH_REGEXP r, a, f, ...
*
* if (consts[r].test(input.charAt(currPos))) {
* interpret(ip + 4, ip + 4 + a);
* } else {
* interpret(ip + 4 + a, ip + 4 + a + f);
* }
*
* [18] ACCEPT_N n
*
* stack.push(input.substring(currPos, n));
* currPos += n;
*
* [19] ACCEPT_STRING s
*
* stack.push(consts[s]);
* currPos += consts[s].length;
*
* [20] FAIL e
*
* stack.push(null);
* fail(consts[e]);
*
* Calls
* -----
*
* [21] REPORT_SAVED_POS p
*
* reportedPos = stack[p];
*
* [22] REPORT_CURR_POS
*
* reportedPos = currPos;
*
* [23] CALL f, n, pc, p1, p2, ..., pN
*
* value = consts[f](stack[p1], ..., stack[pN]);
* stack.pop(n);
* stack.push(value);
*
* Rules
* -----
*
* [24] RULE r
*
* stack.push(parseRule(r));
*
* Failure Reporting
* -----------------
*
* [25] SILENT_FAILS_ON
*
* silentFails++;
*
* [26] SILENT_FAILS_OFF
*
* silentFails--;
*/
module.exports = function(ast, options) {
var consts = [];
function addConst(value) {
var index = utils.indexOf(consts, function(c) { return c === value; });
return index === -1 ? consts.push(value) - 1 : index;
}
function addFunctionConst(params, code) {
return addConst(
"function(" + params.join(", ") + ") {" + code + "}"
);
}
function buildSequence() {
return Array.prototype.concat.apply([], arguments);
}
function buildCondition(condCode, thenCode, elseCode) {
return condCode.concat(
[thenCode.length, elseCode.length],
thenCode,
elseCode
);
}
function buildLoop(condCode, bodyCode) {
return condCode.concat([bodyCode.length], bodyCode);
}
function buildCall(functionIndex, delta, env, sp) {
var params = utils.map( utils.values(env), function(p) { return sp - p; });
return [op.CALL, functionIndex, delta, params.length].concat(params);
}
function buildSimplePredicate(expression, negative, context) {
var emptyStringIndex = addConst('""'),
nullIndex = addConst('null');
return buildSequence(
[op.PUSH_CURR_POS],
[op.SILENT_FAILS_ON],
generate(expression, {
sp: context.sp + 1,
env: { },
action: null
}),
[op.SILENT_FAILS_OFF],
buildCondition(
[negative ? op.IF_ERROR : op.IF_NOT_ERROR],
buildSequence(
[op.POP],
[negative ? op.POP : op.POP_CURR_POS],
[op.PUSH, emptyStringIndex]
),
buildSequence(
[op.POP],
[negative ? op.POP_CURR_POS : op.POP],
[op.PUSH, nullIndex]
)
)
);
}
function buildSemanticPredicate(code, negative, context) {
var functionIndex = addFunctionConst(utils.keys(context.env), code),
emptyStringIndex = addConst('""'),
nullIndex = addConst('null');
return buildSequence(
[op.REPORT_CURR_POS],
buildCall(functionIndex, 0, context.env, context.sp),
buildCondition(
[op.IF],
buildSequence(
[op.POP],
[op.PUSH, negative ? nullIndex : emptyStringIndex]
),
buildSequence(
[op.POP],
[op.PUSH, negative ? emptyStringIndex : nullIndex]
)
)
);
}
function buildAppendLoop(expressionCode) {
return buildLoop(
[op.WHILE_NOT_ERROR],
buildSequence([op.APPEND], expressionCode)
);
}
var generate = utils.buildNodeVisitor({
grammar: function(node) {
utils.each(node.rules, generate);
node.consts = consts;
},
rule: function(node) {
node.bytecode = generate(node.expression, {
sp: -1, // stack pointer
env: { }, // mapping of label names to stack positions
action: null // action nodes pass themselves to children here
});
},
named: function(node, context) {
var nameIndex = addConst(utils.quote(node.name));
/*
* The code generated below is slightly suboptimal because |FAIL| pushes
* to the stack, so we need to stick a |POP| in front of it. We lack a
* dedicated instruction that would just report the failure and not touch
* the stack.
*/
return buildSequence(
[op.SILENT_FAILS_ON],
generate(node.expression, context),
[op.SILENT_FAILS_OFF],
buildCondition([op.IF_ERROR], [op.FAIL, nameIndex], [])
);
},
choice: function(node, context) {
function buildAlternativesCode(alternatives, context) {
return buildSequence(
generate(alternatives[0], {
sp: context.sp,
env: { },
action: null
}),
alternatives.length > 1
? buildCondition(
[op.IF_ERROR],
buildSequence(
[op.POP],
buildAlternativesCode(alternatives.slice(1), context)
),
[]
)
: []
);
}
return buildAlternativesCode(node.alternatives, context);
},
action: function(node, context) {
var env = { },
emitCall = node.expression.type !== "sequence"
|| node.expression.elements.length === 0;
expressionCode = generate(node.expression, {
sp: context.sp + (emitCall ? 1 : 0),
env: env,
action: node
}),
functionIndex = addFunctionConst(utils.keys(env), node.code);
return emitCall
? buildSequence(
[op.PUSH_CURR_POS],
expressionCode,
buildCondition(
[op.IF_NOT_ERROR],
buildSequence(
[op.REPORT_SAVED_POS, 1],
buildCall(functionIndex, 1, env, context.sp + 2)
),
[]
),
buildCondition([op.IF_ERROR], [op.NIP_CURR_POS], [op.NIP])
)
: expressionCode;
},
sequence: function(node, context) {
var emptyArrayIndex, nullIndex;
function buildElementsCode(elements, context) {
var processedCount, functionIndex;
if (elements.length > 0) {
processedCount = node.elements.length - elements.slice(1).length;
return buildSequence(
generate(elements[0], context),
buildCondition(
[op.IF_NOT_ERROR],
buildElementsCode(elements.slice(1), {
sp: context.sp + 1,
env: context.env,
action: context.action
}),
buildSequence(
processedCount > 1 ? [op.POP_N, processedCount] : [op.POP],
[op.POP_CURR_POS],
[op.PUSH, nullIndex]
)
)
);
} else {
if (context.action) {
functionIndex = addFunctionConst(
utils.keys(context.env),
context.action.code
);
return buildSequence(
[op.REPORT_SAVED_POS, node.elements.length],
buildCall(
functionIndex,
node.elements.length,
context.env,
context.sp
),
buildCondition([op.IF_ERROR], [op.NIP_CURR_POS], [op.NIP])
);
} else {
return buildSequence([op.WRAP, node.elements.length], [op.NIP]);
}
}
}
if (node.elements.length > 0) {
nullIndex = addConst('null');
return buildSequence(
[op.PUSH_CURR_POS],
buildElementsCode(node.elements, {
sp: context.sp + 1,
env: context.env,
action: context.action
})
);
} else {
emptyArrayIndex = addConst('[]');
return [op.PUSH, emptyArrayIndex];
}
},
labeled: function(node, context) {
context.env[node.label] = context.sp + 1;
return generate(node.expression, {
sp: context.sp,
env: { },
action: null
});
},
text: function(node, context) {
return buildSequence(
[op.PUSH_CURR_POS],
generate(node.expression, {
sp: context.sp + 1,
env: { },
action: null
}),
buildCondition([op.IF_NOT_ERROR], [op.TEXT], []),
[op.NIP]
);
},
simple_and: function(node, context) {
return buildSimplePredicate(node.expression, false, context);
},
simple_not: function(node, context) {
return buildSimplePredicate(node.expression, true, context);
},
semantic_and: function(node, context) {
return buildSemanticPredicate(node.code, false, context);
},
semantic_not: function(node, context) {
return buildSemanticPredicate(node.code, true, context);
},
optional: function(node, context) {
var emptyStringIndex = addConst('""');
return buildSequence(
generate(node.expression, {
sp: context.sp,
env: { },
action: null
}),
buildCondition(
[op.IF_ERROR],
buildSequence([op.POP], [op.PUSH, emptyStringIndex]),
[]
)
);
},
zero_or_more: function(node, context) {
var emptyArrayIndex = addConst('[]');
expressionCode = generate(node.expression, {
sp: context.sp + 1,
env: { },
action: null
});
return buildSequence(
[op.PUSH, emptyArrayIndex],
expressionCode,
buildAppendLoop(expressionCode),
[op.POP]
);
},
one_or_more: function(node, context) {
var emptyArrayIndex = addConst('[]');
nullIndex = addConst('null');
expressionCode = generate(node.expression, {
sp: context.sp + 1,
env: { },
action: null
});
return buildSequence(
[op.PUSH, emptyArrayIndex],
expressionCode,
buildCondition(
[op.IF_NOT_ERROR],
buildSequence(buildAppendLoop(expressionCode), [op.POP]),
buildSequence([op.POP], [op.POP], [op.PUSH, nullIndex])
)
);
},
rule_ref: function(node) {
return [op.RULE, utils.indexOfRuleByName(ast, node.name)];
},
literal: function(node) {
var stringIndex, expectedIndex;
if (node.value.length > 0) {
stringIndex = addConst(node.ignoreCase
? utils.quote(node.value.toLowerCase())
: utils.quote(node.value)
);
expectedIndex = addConst(utils.quote(utils.quote(node.value)));
/*
* For case-sensitive strings the value must match the beginning of the
* remaining input exactly. As a result, we can use |ACCEPT_STRING| and
* save one |substr| call that would be needed if we used |ACCEPT_N|.
*/
return buildCondition(
node.ignoreCase
? [op.MATCH_STRING_IC, stringIndex]
: [op.MATCH_STRING, stringIndex],
node.ignoreCase
? [op.ACCEPT_N, node.value.length]
: [op.ACCEPT_STRING, stringIndex],
[op.FAIL, expectedIndex]
);
} else {
stringIndex = addConst('""');
return [op.PUSH, stringIndex];
}
},
"class": function(node) {
var regexp, regexpIndex, expectedIndex;
if (node.parts.length > 0) {
regexp = '/^['
+ (node.inverted ? '^' : '')
+ utils.map(node.parts, function(part) {
return part instanceof Array
? utils.quoteForRegexpClass(part[0])
+ '-'
+ utils.quoteForRegexpClass(part[1])
: utils.quoteForRegexpClass(part);
}).join('')
+ ']/' + (node.ignoreCase ? 'i' : '');
} else {
/*
* IE considers regexps /[]/ and /[^]/ as syntactically invalid, so we
* translate them into euqivalents it can handle.
*/
regexp = node.inverted ? '/^[\\S\\s]/' : '/^(?!)/';
}
regexpIndex = addConst(regexp);
expectedIndex = addConst(utils.quote(node.rawText));
return buildCondition(
[op.MATCH_REGEXP, regexpIndex],
[op.ACCEPT_N, 1],
[op.FAIL, expectedIndex]
);
},
any: function(node) {
var expectedIndex = addConst(utils.quote("any character"));
return buildCondition(
[op.MATCH_ANY],
[op.ACCEPT_N, 1],
[op.FAIL, expectedIndex]
);
}
});
generate(ast);
};