Code generator rewrite

This is a complete rewrite of the PEG.js code generator. Its goals are:

  1. Allow optimizing the generated parser code for code size as well as
     for parsing speed.

  2. Prepare ground for future optimizations and big features (like
     incremental parsing).

  2. Replace the old template-based code-generation system with
     something more lightweight and flexible.

  4. General code cleanup (structure, style, variable names, ...).

New Architecture
----------------

The new code generator consists of two steps:

  * Bytecode generator -- produces bytecode for an abstract virtual
    machine

  * JavaScript generator -- produces JavaScript code based on the
    bytecode

The abstract virtual machine is stack-based. Originally I wanted to make
it register-based, but it turned out that all the code related to it
would be more complex and the bytecode itself would be longer (because
of explicit register specifications in instructions). The only downsides
of the stack-based approach seem to be few small inefficiencies (see
e.g. the |NIP| instruction), which seem to be insignificant.

The new generator allows optimizing for parsing speed or code size (you
can choose using the |optimize| option of the |PEG.buildParser| method
or the --optimize/-o option on the command-line).

When optimizing for size, the JavaScript generator emits the bytecode
together with its constant table and a generic bytecode interpreter.
Because the interpreter is small and the bytecode and constant table
grow only slowly with size of the grammar, the resulting parser is also
small.

When optimizing for speed, the JavaScript generator just compiles the
bytecode into JavaScript. The generated code is relatively efficient, so
the resulting parser is fast.

Internal Identifiers
--------------------

As a small bonus, all internal identifiers visible to user code in the
initializer, actions and predicates are prefixed by |peg$|. This lowers
the chance that identifiers in user code will conflict with the ones
from PEG.js. It also makes using any internals in user code ugly, which
is a good thing. This solves GH-92.

Performance
-----------

The new code generator improved parsing speed and parser code size
significantly. The generated parsers are now:

  * 39% faster when optimizing for speed

  * 69% smaller when optimizing for size (without minification)

  * 31% smaller when optimizing for size (with minification)

(Parsing speed was measured using the |benchmark/run| script. Code size
was measured by generating parsers for examples in the |examples|
directory and adding up the file sizes. Minification was done by |uglify
--ascii| in version 1.3.4.)

Final Note
----------

This is just a beginning! The new code generator lays a foundation upon
which many optimizations and improvements can (and will) be made.

Stay tuned :-)
redux
David Majda 11 years ago
parent bea6b1fde7
commit fe1ca481ab

@ -8,8 +8,9 @@ PEGJS_VERSION = `cat $(VERSION_FILE)`
MODULES = utils \
grammar-error \
parser \
compiler/passes/allocate-registers \
compiler/passes/generate-code \
compiler/opcodes \
compiler/passes/generate-bytecode \
compiler/passes/generate-javascript \
compiler/passes/remove-proxy-rules \
compiler/passes/report-left-recursion \
compiler/passes/report-missing-rules \

@ -108,6 +108,8 @@ object to `PEG.buildParser`. The following options are supported:
* `output` — if set to `"parser"`, the method will return generated parser
object; if set to `"source"`, it will return parser source code as a string
(default: `"parser"`)
* `optimize`— selects between optimizing the generated parser for parsing
speed (`"speed"`) or code size (`"size"`) (default: `"speed"`)
Using the Parser
----------------

@ -24,11 +24,11 @@ table tr.total td.parse-speed .value { font-size: 175%; }
a, a:visited { color: #3d586c; }
#options {
width: 46em;
margin: 2em auto; border-radius: .5em; -moz-border-radius: .5em; padding: .5em 1em;
text-align: center;
width: 45em;
margin: 2em auto; border-radius: .5em; -moz-border-radius: .5em; padding: .5em 1.5em;
background-color: #f0f0f0;
}
#options #run-count { width: 3em; }
#options #cache { margin-left: 2em; }
#options #run { width: 5em; margin-left: 2em; }
#options label[for=optimize] { margin-left: 2em; }
#options #run { float:right; width: 5em; }

@ -13,6 +13,11 @@
<input type="text" id="run-count" value="10"> times
<input type="checkbox" id="cache">
<label for="cache">Use results cache</label>
<label for="optimize">Optimize:</label>
<select id="optimize">
<option value="speed">Speed</option>
<option value="size">Size</option>
</select>
<input type="button" id="run" value="Run">
</div>

@ -63,7 +63,8 @@ $("#run").click(function() {
}
var options = {
cache: $("#cache").is(":checked"),
cache: $("#cache").is(":checked"),
optimize: $("#optimize").val()
};
Runner.run(benchmarks, runCount, options, {

@ -81,6 +81,8 @@ function printHelp() {
util.puts("Options:");
util.puts(" -n, --run-count <n> number of runs (default: 10)");
util.puts(" --cache make tested parsers cache results");
util.puts(" -o, --optimize <goal> select optimization for speed or size (default:");
util.puts(" speed)");
}
function exitSuccess() {
@ -111,7 +113,10 @@ function nextArg() {
/* Main */
var runCount = 10;
var options = { };
var options = {
cache: false,
optimize: "speed"
};
while (args.length > 0 && isOption(args[0])) {
switch (args[0]) {
@ -131,6 +136,18 @@ while (args.length > 0 && isOption(args[0])) {
options.cache = true;
break;
case "-o":
case "--optimize":
nextArg();
if (args.length === 0) {
abort("Missing parameter of the -o/--optimize option.");
}
if (args[0] !== "speed" && args[0] !== "size") {
abort("Optimization goal must be either \"speed\" or \"size\".");
}
options.optimize = args[0];
break;
case "-h":
case "--help":
printHelp();

@ -29,6 +29,8 @@ function printHelp() {
util.puts(" parser will be allowed to start parsing");
util.puts(" from (default: the first rule in the");
util.puts(" grammar)");
util.puts(" -o, --optimize <goal> select optimization for speed or size (default:");
util.puts(" speed)");
util.puts(" -v, --version print version information and exit");
util.puts(" -h, --help print help and exit");
}
@ -71,8 +73,9 @@ function readStream(inputStream, callback) {
/* This makes the generated parser a CommonJS module by default. */
var exportVar = "module.exports";
var options = {
cache: false,
output: "source"
cache: false,
output: "source",
optimize: "speed"
};
while (args.length > 0 && isOption(args[0])) {
@ -100,6 +103,18 @@ while (args.length > 0 && isOption(args[0])) {
.map(function(s) { return s.trim() });
break;
case "-o":
case "--optimize":
nextArg();
if (args.length === 0) {
abort("Missing parameter of the -o/--optimize option.");
}
if (args[0] !== "speed" && args[0] !== "size") {
abort("Optimization goal must be either \"speed\" or \"size\".");
}
options.optimize = args[0];
break;
case "-v":
case "--version":
printVersion();

@ -11,8 +11,8 @@ module.exports = {
"reportMissingRules",
"reportLeftRecursion",
"removeProxyRules",
"allocateRegisters",
"generateCode"
"generateBytecode",
"generateJavascript"
],
/*

@ -0,0 +1,46 @@
/* Bytecode instruction opcodes. */
module.exports = {
/* Stack Manipulation */
PUSH: 0, // PUSH c
PUSH_CURR_POS: 1, // PUSH_CURR_POS
POP: 2, // POP
POP_CURR_POS: 3, // POP_CURR_POS
POP_N: 4, // POP_N n
NIP: 5, // NIP
NIP_CURR_POS: 6, // NIP_CURR_POS
APPEND: 7, // APPEND
WRAP: 8, // WRAP n
TEXT: 9, // TEXT
/* Conditions and Loops */
IF: 10, // IF t, f
IF_ERROR: 11, // IF_ERROR t, f
IF_NOT_ERROR: 12, // IF_NOT_ERROR t, f
WHILE_NOT_ERROR: 13, // WHILE_NOT_ERROR b
/* Matching */
MATCH_ANY: 14, // MATCH_ANY a, f, ...
MATCH_STRING: 15, // MATCH_STRING s, a, f, ...
MATCH_STRING_IC: 16, // MATCH_STRING_IC s, a, f, ...
MATCH_REGEXP: 17, // MATCH_REGEXP r, a, f, ...
ACCEPT_N: 18, // ACCEPT_N n
ACCEPT_STRING: 19, // ACCEPT_STRING s
FAIL: 20, // FAIL e
/* Calls */
REPORT_SAVED_POS: 21, // REPORT_SAVED_POS p
REPORT_CURR_POS: 22, // REPORT_CURR_POS
CALL: 23, // CALL f, n, pc, p1, p2, ..., pN
/* Rules */
RULE: 24, // RULE r
/* Failure Reporting */
SILENT_FAILS_ON: 25, // SILENT_FAILS_ON
SILENT_FAILS_OFF: 26 // SILENT_FAILS_FF
};

@ -9,6 +9,6 @@ module.exports = {
reportMissingRules: require("./passes/report-missing-rules"),
reportLeftRecursion: require("./passes/report-left-recursion"),
removeProxyRules: require("./passes/remove-proxy-rules"),
allocateRegisters: require("./passes/allocate-registers"),
generateCode: require("./passes/generate-code")
generateBytecode: require("./passes/generate-bytecode"),
generateJavascript: require("./passes/generate-javascript")
};

@ -1,230 +0,0 @@
var utils = require("../../utils");
/*
* Allocates registers that the generated code for each node will use to store
* match results and parse positions. For "action", "semantic_and" and
* "semantic_or" nodes it also computes visibility of labels at the point of
* action/predicate code execution and a mapping from label names to registers
* that will contain the labeled values.
*
* The following will hold after running this pass:
*
* * All nodes except "grammar" and "rule" nodes will have a |resultIndex|
* property. It will contain an index of a register that will store a match
* result of the expression represented by the node in generated code.
*
* * Some nodes will have a |posIndex| property. It will contain an index of a
* register that will store a saved parse position in generated code.
*
* * All "rule" nodes will contain a |registerCount| property. It will contain
* the number of registers that will be used by code generated for the
* rule's expression.
*
* * All "action", "semantic_and" and "semantic_or" nodes will have a |params|
* property. It will contain a mapping from names of labels visible at the
* point of action/predicate code execution to registers that will contain
* the labeled values.
*/
module.exports = function(ast) {
/*
* Register allocator that allocates registers from an unlimited
* integer-indexed pool. It allows allocating and releaseing registers in any
* order. It also supports reference counting (this simplifies tracking active
* registers when they store values passed to action/predicate code).
* Allocating a register allways uses the first free register (the one with
* the lowest index).
*/
var registers = (function() {
var refCounts = []; // reference count for each register that was
// allocated at least once
return {
alloc: function() {
var i;
for (i = 0; i < refCounts.length; i++) {
if (refCounts[i] === 0) {
refCounts[i] = 1;
return i;
}
}
refCounts.push(1);
return refCounts.length - 1;
},
use: function(index) {
refCounts[index]++;
},
release: function(index) {
refCounts[index]--;
},
maxIndex: function() {
return refCounts.length - 1;
},
reset: function() {
refCounts = [];
}
};
})();
/*
* Manages mapping of label names to indices of registers that will store the
* labeled values as long as they are in scope.
*/
var vars = (function(registers) {
var envs = []; // stack of nested environments
return {
beginScope: function() {
envs.push({});
},
endScope: function() {
var env = envs.pop(), name;
for (name in env) {
registers.release(env[name]);
}
},
add: function(name, index) {
envs[envs.length - 1][name] = index;
registers.use(index);
},
buildParams: function() {
var env = envs[envs.length - 1], params = {}, name;
for (name in env) {
params[name] = env[name];
}
return params;
}
};
})(registers);
function savePos(node, f) {
node.posIndex = registers.alloc();
f();
registers.release(node.posIndex);
}
function reuseResult(node, subnode) {
subnode.resultIndex = node.resultIndex;
}
function allocResult(node, f) {
node.resultIndex = registers.alloc();
f();
registers.release(node.resultIndex);
}
function scoped(f) {
vars.beginScope();
f();
vars.endScope();
}
function nop() {}
function computeExpressionScoped(node) {
scoped(function() { compute(node.expression); });
}
function computeExpressionScopedReuseResult(node) {
reuseResult(node, node.expression);
computeExpressionScoped(node);
}
function computeExpressionScopedAllocResult(node) {
allocResult(node.expression, function() { computeExpressionScoped(node); });
}
function computeExpressionScopedReuseResultSavePos(node) {
savePos(node, function() { computeExpressionScopedReuseResult(node); });
}
function computeParams(node) {
node.params = vars.buildParams();
}
var compute = utils.buildNodeVisitor({
grammar:
function(node) {
utils.each(node.rules, compute);
},
rule:
function(node) {
registers.reset();
computeExpressionScopedAllocResult(node);
node.registerCount = registers.maxIndex() + 1;
},
named:
function(node) {
reuseResult(node, node.expression);
compute(node.expression);
},
choice:
function(node) {
utils.each(node.alternatives, function(alternative) {
reuseResult(node, alternative);
scoped(function() {
compute(alternative);
});
});
},
action:
function(node) {
savePos(node, function() {
reuseResult(node, node.expression);
scoped(function() {
compute(node.expression);
computeParams(node);
});
});
},
sequence:
function(node) {
savePos(node, function() {
utils.each(node.elements, function(element) {
element.resultIndex = registers.alloc();
compute(element);
});
utils.each(node.elements, function(element) {
registers.release(element.resultIndex);
});
});
},
labeled:
function(node) {
vars.add(node.label, node.resultIndex);
computeExpressionScopedReuseResult(node);
},
text: computeExpressionScopedReuseResultSavePos,
simple_and: computeExpressionScopedReuseResultSavePos,
simple_not: computeExpressionScopedReuseResultSavePos,
semantic_and: computeParams,
semantic_not: computeParams,
optional: computeExpressionScopedReuseResult,
zero_or_more: computeExpressionScopedAllocResult,
one_or_more: computeExpressionScopedAllocResult,
rule_ref: nop,
literal: nop,
"class": nop,
any: nop
});
compute(ast);
};

@ -0,0 +1,594 @@
var utils = require("../../utils"),
op = require("../opcodes");
/* Generates bytecode.
*
* Instructions
* ============
*
* Stack Manipulation
* ------------------
*
* [0] PUSH c
*
* stack.push(consts[c]);
*
* [1] PUSH_CURR_POS
*
* stack.push(currPos);
*
* [2] POP
*
* stack.pop();
*
* [3] POP_CURR_POS
*
* currPos = stack.pop();
*
* [4] POP_N n
*
* stack.pop(n);
*
* [5] NIP
*
* value = stack.pop();
* stack.pop();
* stack.push(value);
*
* [6] NIP_CURR_POS
*
* value = stack.pop();
* currPos = stack.pop();
* stack.push(value);
*
* [8] APPEND
*
* value = stack.pop();
* array = stack.pop();
* array.push(value);
* stack.push(array);
*
* [9] WRAP n
*
* stack.push(stack.pop(n));
*
* [10] TEXT
*
* stack.pop();
* stack.push(input.substring(stack.top(), currPos));
*
* Conditions and Loops
* --------------------
*
* [11] IF t, f
*
* if (stack.top()) {
* interpret(ip + 3, ip + 3 + t);
* } else {
* interpret(ip + 3 + t, ip + 3 + t + f);
* }
*
* [12] IF_ERROR t, f
*
* if (stack.top() === null) {
* interpret(ip + 3, ip + 3 + t);
* } else {
* interpret(ip + 3 + t, ip + 3 + t + f);
* }
*
* [13] IF_NOT_ERROR t, f
*
* if (stack.top() !== null) {
* interpret(ip + 3, ip + 3 + t);
* } else {
* interpret(ip + 3 + t, ip + 3 + t + f);
* }
*
* [14] WHILE_NOT_ERROR b
*
* while(stack.top() !== null) {
* interpret(ip + 2, ip + 2 + b);
* }
*
* Matching
* --------
*
* [15] MATCH_ANY a, f, ...
*
* if (input.length > currPos) {
* interpret(ip + 3, ip + 3 + a);
* } else {
* interpret(ip + 3 + a, ip + 3 + a + f);
* }
*
* [16] MATCH_STRING s, a, f, ...
*
* if (input.substr(currPos, consts[s].length) === consts[s]) {
* interpret(ip + 4, ip + 4 + a);
* } else {
* interpret(ip + 4 + a, ip + 4 + a + f);
* }
*
* [17] MATCH_STRING_IC s, a, f, ...
*
* if (input.substr(currPos, consts[s].length).toLowerCase() === consts[s]) {
* interpret(ip + 4, ip + 4 + a);
* } else {
* interpret(ip + 4 + a, ip + 4 + a + f);
* }
*
* [18] MATCH_REGEXP r, a, f, ...
*
* if (consts[r].test(input.charAt(currPos))) {
* interpret(ip + 4, ip + 4 + a);
* } else {
* interpret(ip + 4 + a, ip + 4 + a + f);
* }
*
* [19] ACCEPT_N n
*
* stack.push(input.substring(currPos, n));
* currPos += n;
*
* [20] ACCEPT_STRING s
*
* stack.push(consts[s]);
* currPos += consts[s].length;
*
* [21] FAIL e
*
* stack.push(null);
* fail(consts[e]);
*
* Calls
* -----
*
* [22] REPORT_SAVED_POS p
*
* reportedPos = stack[p];
*
* [23] REPORT_CURR_POS
*
* reportedPos = currPos;
*
* [25] CALL f, n, pc, p1, p2, ..., pN
*
* value = consts[f](stack[p1], ..., stack[pN]);
* stack.pop(n);
* stack.push(value);
*
* Rules
* -----
*
* [26] RULE r
*
* stack.push(parseRule(r));
*
* Failure Reporting
* -----------------
*
* [27] SILENT_FAILS_ON
*
* silentFails++;
*
* [28] SILENT_FAILS_OFF
*
* silentFails--;
*/
module.exports = function(ast, options) {
var consts = [];
function addConst(value) {
var index = utils.indexOf(consts, function(c) { return c === value; });
return index === -1 ? consts.push(value) - 1 : index;
}
function addFunctionConst(params, code) {
return addConst(
"function(" + params.join(", ") + ") {" + code + "}"
);
}
function buildSequence() {
return Array.prototype.concat.apply([], arguments);
}
function buildCondition(condCode, thenCode, elseCode) {
return condCode.concat(
[thenCode.length, elseCode.length],
thenCode,
elseCode
);
}
function buildLoop(condCode, bodyCode) {
return condCode.concat([bodyCode.length], bodyCode);
}
function buildCall(functionIndex, delta, env, sp) {
var params = utils.map( utils.values(env), function(p) { return sp - p; });
return [op.CALL, functionIndex, delta, params.length].concat(params);
}
function buildSimplePredicate(expression, negative, context) {
var emptyStringIndex = addConst('""'),
nullIndex = addConst('null');
return buildSequence(
[op.PUSH_CURR_POS],
[op.SILENT_FAILS_ON],
generate(expression, {
sp: context.sp + 1,
env: { },
action: null
}),
[op.SILENT_FAILS_OFF],
buildCondition(
[negative ? op.IF_ERROR : op.IF_NOT_ERROR],
buildSequence(
[op.POP],
[negative ? op.POP : op.POP_CURR_POS],
[op.PUSH, emptyStringIndex]
),
buildSequence(
[op.POP],
[negative ? op.POP_CURR_POS : op.POP],
[op.PUSH, nullIndex]
)
)
);
}
function buildSemanticPredicate(code, negative, context) {
var functionIndex = addFunctionConst(utils.keys(context.env), code),
emptyStringIndex = addConst('""'),
nullIndex = addConst('null');
return buildSequence(
[op.REPORT_CURR_POS],
buildCall(functionIndex, 0, context.env, context.sp),
buildCondition(
[op.IF],
buildSequence(
[op.POP],
[op.PUSH, negative ? nullIndex : emptyStringIndex]
),
buildSequence(
[op.POP],
[op.PUSH, negative ? emptyStringIndex : nullIndex]
)
)
);
}
function buildAppendLoop(expressionCode) {
return buildLoop(
[op.WHILE_NOT_ERROR],
buildSequence([op.APPEND], expressionCode)
);
}
var generate = utils.buildNodeVisitor({
grammar: function(node) {
utils.each(node.rules, generate);
node.consts = consts;
},
rule: function(node) {
node.bytecode = generate(node.expression, {
sp: -1, // stack pointer
env: { }, // mapping of label names to stack positions
action: null // action nodes pass themselves to children here
});
},
named: function(node, context) {
var nameIndex = addConst(utils.quote(node.name));
/*
* The code generated below is slightly suboptimal because |FAIL| pushes
* to the stack, so we need to stick a |POP| in front of it. We lack a
* dedicated instruction that would just report the failure and not touch
* the stack.
*/
return buildSequence(
[op.SILENT_FAILS_ON],
generate(node.expression, context),
[op.SILENT_FAILS_OFF],
buildCondition([op.IF_ERROR], [op.FAIL, nameIndex], [])
);
},
choice: function(node, context) {
function buildAlternativesCode(alternatives, context) {
return buildSequence(
generate(alternatives[0], {
sp: context.sp,
env: { },
action: null
}),
alternatives.length > 1
? buildCondition(
[op.IF_ERROR],
buildSequence(
[op.POP],
buildAlternativesCode(alternatives.slice(1), context)
),
[]
)
: []
);
}
return buildAlternativesCode(node.alternatives, context);
},
action: function(node, context) {
var env = { },
emitCall = node.expression.type !== "sequence"
|| node.expression.elements.length === 0;
expressionCode = generate(node.expression, {
sp: context.sp + (emitCall ? 1 : 0),
env: env,
action: node
}),
functionIndex = addFunctionConst(utils.keys(env), node.code);
return emitCall
? buildSequence(
[op.PUSH_CURR_POS],
expressionCode,
buildCondition(
[op.IF_NOT_ERROR],
buildSequence(
[op.REPORT_SAVED_POS, 1],
buildCall(functionIndex, 1, env, context.sp + 2)
),
[]
),
buildCondition([op.IF_ERROR], [op.NIP_CURR_POS], [op.NIP])
)
: expressionCode;
},
sequence: function(node, context) {
var emptyArrayIndex, nullIndex;
function buildElementsCode(elements, context) {
var processedCount, functionIndex;
if (elements.length > 0) {
processedCount = node.elements.length - elements.slice(1).length;
return buildSequence(
generate(elements[0], context),
buildCondition(
[op.IF_NOT_ERROR],
buildElementsCode(elements.slice(1), {
sp: context.sp + 1,
env: context.env,
action: context.action
}),
buildSequence(
processedCount > 1 ? [op.POP_N, processedCount] : [op.POP],
[op.POP_CURR_POS],
[op.PUSH, nullIndex]
)
)
);
} else {
if (context.action) {
functionIndex = addFunctionConst(
utils.keys(context.env),
context.action.code
);
return buildSequence(
[op.REPORT_SAVED_POS, node.elements.length],
buildCall(
functionIndex,
node.elements.length,
context.env,
context.sp
),
buildCondition([op.IF_ERROR], [op.NIP_CURR_POS], [op.NIP])
);
} else {
return buildSequence([op.WRAP, node.elements.length], [op.NIP]);
}
}
}
if (node.elements.length > 0) {
nullIndex = addConst('null');
return buildSequence(
[op.PUSH_CURR_POS],
buildElementsCode(node.elements, {
sp: context.sp + 1,
env: context.env,
action: context.action
})
);
} else {
emptyArrayIndex = addConst('[]');
return [op.PUSH, emptyArrayIndex];
}
},
labeled: function(node, context) {
context.env[node.label] = context.sp + 1;
return generate(node.expression, {
sp: context.sp,
env: { },
action: null
});
},
text: function(node, context) {
return buildSequence(
[op.PUSH_CURR_POS],
generate(node.expression, {
sp: context.sp + 1,
env: { },
action: null
}),
buildCondition([op.IF_NOT_ERROR], [op.TEXT], []),
[op.NIP]
);
},
simple_and: function(node, context) {
return buildSimplePredicate(node.expression, false, context);
},
simple_not: function(node, context) {
return buildSimplePredicate(node.expression, true, context);
},
semantic_and: function(node, context) {
return buildSemanticPredicate(node.code, false, context);
},
semantic_not: function(node, context) {
return buildSemanticPredicate(node.code, true, context);
},
optional: function(node, context) {
var emptyStringIndex = addConst('""');
return buildSequence(
generate(node.expression, {
sp: context.sp,
env: { },
action: null
}),
buildCondition(
[op.IF_ERROR],
buildSequence([op.POP], [op.PUSH, emptyStringIndex]),
[]
)
);
},
zero_or_more: function(node, context) {
var emptyArrayIndex = addConst('[]');
expressionCode = generate(node.expression, {
sp: context.sp + 1,
env: { },
action: null
});
return buildSequence(
[op.PUSH, emptyArrayIndex],
expressionCode,
buildAppendLoop(expressionCode),
[op.POP]
);
},
one_or_more: function(node, context) {
var emptyArrayIndex = addConst('[]');
nullIndex = addConst('null');
expressionCode = generate(node.expression, {
sp: context.sp + 1,
env: { },
action: null
});
return buildSequence(
[op.PUSH, emptyArrayIndex],
expressionCode,
buildCondition(
[op.IF_NOT_ERROR],
buildSequence(buildAppendLoop(expressionCode), [op.POP]),
buildSequence([op.POP], [op.POP], [op.PUSH, nullIndex])
)
);
},
rule_ref: function(node) {
return [op.RULE, utils.indexOfRuleByName(ast, node.name)];
},
literal: function(node) {
var stringIndex, expectedIndex;
if (node.value.length > 0) {
stringIndex = addConst(node.ignoreCase
? utils.quote(node.value.toLowerCase())
: utils.quote(node.value)
);
expectedIndex = addConst(utils.quote(utils.quote(node.value)));
/*
* For case-sensitive strings the value must match the beginning of the
* remaining input exactly. As a result, we can use |ACCEPT_STRING| and
* save one |substr| call that would be needed if we used |ACCEPT_N|.
*/
return buildCondition(
node.ignoreCase
? [op.MATCH_STRING_IC, stringIndex]
: [op.MATCH_STRING, stringIndex],
node.ignoreCase
? [op.ACCEPT_N, node.value.length]
: [op.ACCEPT_STRING, stringIndex],
[op.FAIL, expectedIndex]
);
} else {
stringIndex = addConst('""');
return [op.PUSH, stringIndex];
}
},
"class": function(node) {
var regexp, regexpIndex, expectedIndex;
if (node.parts.length > 0) {
regexp = '/^['
+ (node.inverted ? '^' : '')
+ utils.map(node.parts, function(part) {
return part instanceof Array
? utils.quoteForRegexpClass(part[0])
+ '-'
+ utils.quoteForRegexpClass(part[1])
: utils.quoteForRegexpClass(part);
}).join('')
+ ']/' + (node.ignoreCase ? 'i' : '');
} else {
/*
* IE considers regexps /[]/ and /[^]/ as syntactically invalid, so we
* translate them into euqivalents it can handle.
*/
regexp = node.inverted ? '/^[\\S\\s]/' : '/^(?!)/';
}
regexpIndex = addConst(regexp);
expectedIndex = addConst(utils.quote(node.rawText));
return buildCondition(
[op.MATCH_REGEXP, regexpIndex],
[op.ACCEPT_N, 1],
[op.FAIL, expectedIndex]
);
},
any: function(node) {
var expectedIndex = addConst(utils.quote("any character"));
return buildCondition(
[op.MATCH_ANY],
[op.ACCEPT_N, 1],
[op.FAIL, expectedIndex]
);
}
});
generate(ast);
};

@ -1,867 +0,0 @@
var utils = require("../../utils");
/* Generates the parser code. */
module.exports = function(ast, options) {
options = utils.clone(options);
utils.defaults(options, {
cache: false,
allowedStartRules: [ast.startRule]
});
/*
* Codie 1.1.0
*
* https://github.com/dmajda/codie
*
* Copyright (c) 2011-2012 David Majda
* Licensend under the MIT license.
*/
var Codie = (function(undefined) {
function stringEscape(s) {
function hex(ch) { return ch.charCodeAt(0).toString(16).toUpperCase(); }
/*
* ECMA-262, 5th ed., 7.8.4: All characters may appear literally in a
* string literal except for the closing quote character, backslash,
* carriage return, line separator, paragraph separator, and line feed.
* Any character may appear in the form of an escape sequence.
*
* For portability, we also escape escape all control and non-ASCII
* characters. Note that "\0" and "\v" escape sequences are not used
* because JSHint does not like the first and IE the second.
*/
return s
.replace(/\\/g, '\\\\') // backslash
.replace(/"/g, '\\"') // closing double quote
.replace(/\x08/g, '\\b') // backspace
.replace(/\t/g, '\\t') // horizontal tab
.replace(/\n/g, '\\n') // line feed
.replace(/\f/g, '\\f') // form feed
.replace(/\r/g, '\\r') // carriage return
.replace(/[\x00-\x07\x0B\x0E\x0F]/g, function(ch) { return '\\x0' + hex(ch); })
.replace(/[\x10-\x1F\x80-\xFF]/g, function(ch) { return '\\x' + hex(ch); })
.replace(/[\u0180-\u0FFF]/g, function(ch) { return '\\u0' + hex(ch); })
.replace(/[\u1080-\uFFFF]/g, function(ch) { return '\\u' + hex(ch); });
}
function push(s) { return '__p.push(' + s + ');'; }
function pushRaw(template, length, state) {
function unindent(code, level, unindentFirst) {
return code.replace(
new RegExp('^.{' + level +'}', "gm"),
function(str, offset) {
if (offset === 0) {
return unindentFirst ? '' : str;
} else {
return "";
}
}
);
}
var escaped = stringEscape(unindent(
template.substring(0, length),
state.indentLevel(),
state.atBOL
));
return escaped.length > 0 ? push('"' + escaped + '"') : '';
}
var Codie = {
/* Codie version (uses semantic versioning). */
VERSION: "1.1.0",
/*
* Specifies by how many characters do #if/#else and #for unindent their
* content in the generated code.
*/
indentStep: 2,
/* Description of #-commands. Extend to define your own commands. */
commands: {
"if": {
params: /^(.*)$/,
compile: function(state, prefix, params) {
return ['if(' + params[0] + '){', []];
},
stackOp: "push"
},
"else": {
params: /^$/,
compile: function(state) {
var stack = state.commandStack,
insideElse = stack[stack.length - 1] === "else",
insideIf = stack[stack.length - 1] === "if";
if (insideElse) { throw new Error("Multiple #elses."); }
if (!insideIf) { throw new Error("Using #else outside of #if."); }
return ['}else{', []];
},
stackOp: "replace"
},
"for": {
params: /^([a-zA-Z_][a-zA-Z0-9_]*)[ \t]+in[ \t]+(.*)$/,
init: function(state) {
state.forCurrLevel = 0; // current level of #for loop nesting
state.forMaxLevel = 0; // maximum level of #for loop nesting
},
compile: function(state, prefix, params) {
var c = '__c' + state.forCurrLevel, // __c for "collection"
l = '__l' + state.forCurrLevel, // __l for "length"
i = '__i' + state.forCurrLevel; // __i for "index"
state.forCurrLevel++;
if (state.forMaxLevel < state.forCurrLevel) {
state.forMaxLevel = state.forCurrLevel;
}
return [
c + '=' + params[1] + ';'
+ l + '=' + c + '.length;'
+ 'for(' + i + '=0;' + i + '<' + l + ';' + i + '++){'
+ params[0] + '=' + c + '[' + i + '];',
[params[0], c, l, i]
];
},
exit: function(state) { state.forCurrLevel--; },
stackOp: "push"
},
"end": {
params: /^$/,
compile: function(state) {
var stack = state.commandStack, exit;
if (stack.length === 0) { throw new Error("Too many #ends."); }
exit = Codie.commands[stack[stack.length - 1]].exit;
if (exit) { exit(state); }
return ['}', []];
},
stackOp: "pop"
},
"block": {
params: /^(.*)$/,
compile: function(state, prefix, params) {
var x = '__x', // __x for "prefix",
n = '__n', // __n for "lines"
l = '__l', // __l for "length"
i = '__i'; // __i for "index"
/*
* Originally, the generated code used |String.prototype.replace|, but
* it is buggy in certain versions of V8 so it was rewritten. See the
* tests for details.
*/
return [
x + '="' + stringEscape(prefix.substring(state.indentLevel())) + '";'
+ n + '=(' + params[0] + ').toString().split("\\n");'
+ l + '=' + n + '.length;'
+ 'for(' + i + '=0;' + i + '<' + l + ';' + i + '++){'
+ n + '[' + i +']=' + x + '+' + n + '[' + i + ']+"\\n";'
+ '}'
+ push(n + '.join("")'),
[x, n, l, i]
];
},
stackOp: "nop"
}
},
/*
* Compiles a template into a function. When called, this function will
* execute the template in the context of an object passed in a parameter and
* return the result.
*/
template: function(template) {
var stackOps = {
push: function(stack, name) { stack.push(name); },
replace: function(stack, name) { stack[stack.length - 1] = name; },
pop: function(stack) { stack.pop(); },
nop: function() { }
};
function compileExpr(state, expr) {
state.atBOL = false;
return [push(expr), []];
}
function compileCommand(state, prefix, name, params) {
var command, match, result;
command = Codie.commands[name];
if (!command) { throw new Error("Unknown command: #" + name + "."); }
match = command.params.exec(params);
if (match === null) {
throw new Error(
"Invalid params for command #" + name + ": " + params + "."
);
}
result = command.compile(state, prefix, match.slice(1));
stackOps[command.stackOp](state.commandStack, name);
state.atBOL = true;
return result;
}
var state = { // compilation state
commandStack: [], // stack of commands as they were nested
atBOL: true, // is the next character to process at BOL?
indentLevel: function() {
return Codie.indentStep * this.commandStack.length;
}
},
code = '', // generated template function code
vars = ['__p=[]'], // variables used by generated code
name, match, result, i;
/* Initialize state. */
for (name in Codie.commands) {
if (Codie.commands[name].init) { Codie.commands[name].init(state); }
}
/* Compile the template. */
while ((match = /^([ \t]*)#([a-zA-Z_][a-zA-Z0-9_]*)(?:[ \t]+([^ \t\n][^\n]*))?[ \t]*(?:\n|$)|#\{([^}]*)\}/m.exec(template)) !== null) {
code += pushRaw(template, match.index, state);
result = match[2] !== undefined && match[2] !== ""
? compileCommand(state, match[1], match[2], match[3] || "") // #-command
: compileExpr(state, match[4]); // #{...}
code += result[0];
vars = vars.concat(result[1]);
template = template.substring(match.index + match[0].length);
}
code += pushRaw(template, template.length, state);
/* Check the final state. */
if (state.commandStack.length > 0) { throw new Error("Missing #end."); }
/* Sanitize the list of variables used by commands. */
vars.sort();
for (i = 0; i < vars.length; i++) {
if (vars[i] === vars[i - 1]) { vars.splice(i--, 1); }
}
/* Create the resulting function. */
return new Function("__v", [
'__v=__v||{};',
'var ' + vars.join(',') + ';',
'with(__v){',
code,
'return __p.join("").replace(/^\\n+|\\n+$/g,"");};'
].join(''));
}
};
return Codie;
})();
var templates = (function() {
var name,
templates = {},
sources = {
grammar: [
'(function(){',
' /*',
' * Generated by PEG.js 0.7.0.',
' *',
' * http://pegjs.majda.cz/',
' */',
' ',
/* This needs to be in sync with |subclass| in utils.js. */
' function subclass(child, parent) {',
' function ctor() { this.constructor = child; }',
' ctor.prototype = parent.prototype;',
' child.prototype = new ctor();',
' }',
' ',
/* This needs to be in sync with |quote| in utils.js. */
' function quote(s) {',
' /*',
' * ECMA-262, 5th ed., 7.8.4: All characters may appear literally in a',
' * string literal except for the closing quote character, backslash,',
' * carriage return, line separator, paragraph separator, and line feed.',
' * Any character may appear in the form of an escape sequence.',
' *',
' * For portability, we also escape escape all control and non-ASCII',
' * characters. Note that "\\0" and "\\v" escape sequences are not used',
' * because JSHint does not like the first and IE the second.',
' */',
' return \'"\' + s',
' .replace(/\\\\/g, \'\\\\\\\\\') // backslash',
' .replace(/"/g, \'\\\\"\') // closing quote character',
' .replace(/\\x08/g, \'\\\\b\') // backspace',
' .replace(/\\t/g, \'\\\\t\') // horizontal tab',
' .replace(/\\n/g, \'\\\\n\') // line feed',
' .replace(/\\f/g, \'\\\\f\') // form feed',
' .replace(/\\r/g, \'\\\\r\') // carriage return',
' .replace(/[\\x00-\\x07\\x0B\\x0E-\\x1F\\x80-\\uFFFF]/g, escape)',
' + \'"\';',
' }',
' ',
' var result = {',
' /*',
' * Parses the input with a generated parser. If the parsing is successful,',
' * returns a value explicitly or implicitly specified by the grammar from',
' * which the parser was generated (see |PEG.buildParser|). If the parsing is',
' * unsuccessful, throws |PEG.parser.SyntaxError| describing the error.',
' */',
' parse: function(input) {',
' var parseFunctions = {',
' #for rule in options.allowedStartRules',
' #{string(rule) + ": parse_" + rule + (rule !== options.allowedStartRules[options.allowedStartRules.length - 1] ? "," : "")}',
' #end',
' };',
' ',
' var options = arguments.length > 1 ? arguments[1] : {},',
' startRule;',
' ',
' if (options.startRule !== undefined) {',
' startRule = options.startRule;',
' ',
' if (parseFunctions[startRule] === undefined) {',
' throw new Error("Can\'t start parsing from rule " + quote(startRule) + ".");',
' }',
' } else {',
' startRule = #{string(options.allowedStartRules[0])};',
' }',
' ',
' var pos = 0;',
' var reportedPos = 0;',
' var cachedReportedPos = 0;',
' var cachedReportedPosDetails = { line: 1, column: 1, seenCR: false };',
' var reportFailures = 0;', // 0 = report, anything > 0 = do not report
' var rightmostFailuresPos = 0;',
' var rightmostFailuresExpected = [];',
' #if options.cache',
' var cache = {};',
' #end',
' ',
/* This needs to be in sync with |padLeft| in utils.js. */
' function padLeft(input, padding, length) {',
' var result = input;',
' ',
' var padLength = length - input.length;',
' for (var i = 0; i < padLength; i++) {',
' result = padding + result;',
' }',
' ',
' return result;',
' }',
' ',
/* This needs to be in sync with |escape| in utils.js. */
' function escape(ch) {',
' var charCode = ch.charCodeAt(0);',
' var escapeChar;',
' var length;',
' ',
' if (charCode <= 0xFF) {',
' escapeChar = \'x\';',
' length = 2;',
' } else {',
' escapeChar = \'u\';',
' length = 4;',
' }',
' ',
' return \'\\\\\' + escapeChar + padLeft(charCode.toString(16).toUpperCase(), \'0\', length);',
' }',
' ',
' function computeReportedPosDetails() {',
' function advanceCachedReportedPos() {',
' var ch;',
' ',
' for (; cachedReportedPos < reportedPos; cachedReportedPos++) {',
' ch = input.charAt(cachedReportedPos);',
' if (ch === "\\n") {',
' if (!cachedReportedPosDetails.seenCR) { cachedReportedPosDetails.line++; }',
' cachedReportedPosDetails.column = 1;',
' cachedReportedPosDetails.seenCR = false;',
' } else if (ch === "\\r" || ch === "\\u2028" || ch === "\\u2029") {',
' cachedReportedPosDetails.line++;',
' cachedReportedPosDetails.column = 1;',
' cachedReportedPosDetails.seenCR = true;',
' } else {',
' cachedReportedPosDetails.column++;',
' cachedReportedPosDetails.seenCR = false;',
' }',
' }',
' }',
' ',
' if (cachedReportedPos !== reportedPos) {',
' if (cachedReportedPos > reportedPos) {',
' cachedReportedPos = 0;',
' cachedReportedPosDetails = { line: 1, column: 1, seenCR: false };',
' }',
' advanceCachedReportedPos();',
' }',
' ',
' return cachedReportedPosDetails;',
' }',
' ',
' function text() {',
' return input.substring(reportedPos, pos);',
' }',
' ',
' function offset() {',
' return reportedPos;',
' }',
' ',
' function line() {',
' return computeReportedPosDetails().line;',
' }',
' ',
' function column() {',
' return computeReportedPosDetails().column;',
' }',
' ',
' function matchFailed(failure) {',
' if (pos < rightmostFailuresPos) {',
' return;',
' }',
' ',
' if (pos > rightmostFailuresPos) {',
' rightmostFailuresPos = pos;',
' rightmostFailuresExpected = [];',
' }',
' ',
' rightmostFailuresExpected.push(failure);',
' }',
' ',
' #for rule in node.rules',
' #block emit(rule)',
' ',
' #end',
' ',
' function cleanupExpected(expected) {',
' expected.sort();',
' ',
' var lastExpected = null;',
' var cleanExpected = [];',
' for (var i = 0; i < expected.length; i++) {',
' if (expected[i] !== lastExpected) {',
' cleanExpected.push(expected[i]);',
' lastExpected = expected[i];',
' }',
' }',
' return cleanExpected;',
' }',
' ',
' #if node.initializer',
' #block emit(node.initializer)',
' #end',
' ',
' var result = parseFunctions[startRule]();',
' ',
' /*',
' * The parser is now in one of the following three states:',
' *',
' * 1. The parser successfully parsed the whole input.',
' *',
' * - |result !== null|',
' * - |pos === input.length|',
' * - |rightmostFailuresExpected| may or may not contain something',
' *',
' * 2. The parser successfully parsed only a part of the input.',
' *',
' * - |result !== null|',
' * - |pos < input.length|',
' * - |rightmostFailuresExpected| may or may not contain something',
' *',
' * 3. The parser did not successfully parse any part of the input.',
' *',
' * - |result === null|',
' * - |pos === 0|',
' * - |rightmostFailuresExpected| contains at least one failure',
' *',
' * All code following this comment (including called functions) must',
' * handle these states.',
' */',
' if (result === null || pos !== input.length) {',
' reportedPos = Math.max(pos, rightmostFailuresPos);',
' var found = reportedPos < input.length ? input.charAt(reportedPos) : null;',
' var reportedPosDetails = computeReportedPosDetails();',
' ',
' throw new this.SyntaxError(',
' cleanupExpected(rightmostFailuresExpected),',
' found,',
' reportedPos,',
' reportedPosDetails.line,',
' reportedPosDetails.column',
' );',
' }',
' ',
' return result;',
' }',
' };',
' ',
' /* Thrown when a parser encounters a syntax error. */',
' ',
' result.SyntaxError = function(expected, found, offset, line, column) {',
' function buildMessage(expected, found) {',
' var expectedHumanized, foundHumanized;',
' ',
' switch (expected.length) {',
' case 0:',
' expectedHumanized = "end of input";',
' break;',
' case 1:',
' expectedHumanized = expected[0];',
' break;',
' default:',
' expectedHumanized = expected.slice(0, expected.length - 1).join(", ")',
' + " or "',
' + expected[expected.length - 1];',
' }',
' ',
' foundHumanized = found ? quote(found) : "end of input";',
' ',
' return "Expected " + expectedHumanized + " but " + foundHumanized + " found.";',
' }',
' ',
' this.name = "SyntaxError";',
' this.expected = expected;',
' this.found = found;',
' this.message = buildMessage(expected, found);',
' this.offset = offset;',
' this.line = line;',
' this.column = column;',
' };',
' ',
' subclass(result.SyntaxError, Error);',
' ',
' return result;',
'})()'
],
rule: [
'function parse_#{node.name}() {',
' #if options.cache',
' var cacheKey = "#{node.name}@" + pos;',
' var cachedResult = cache[cacheKey];',
' if (cachedResult) {',
' pos = cachedResult.nextPos;',
' return cachedResult.result;',
' }',
' ',
' #end',
' #if node.registerCount > 0',
' var #{map(range(node.registerCount), r).join(", ")};',
' #end',
' ',
' #block emit(node.expression)',
' #if options.cache',
' ',
' cache[cacheKey] = {',
' nextPos: pos,',
' result: #{r(node.expression.resultIndex)}',
' };',
' #end',
' return #{r(node.expression.resultIndex)};',
'}'
],
named: [
'reportFailures++;',
'#block emit(node.expression)',
'reportFailures--;',
'if (reportFailures === 0 && #{r(node.resultIndex)} === null) {',
' matchFailed(#{string(node.name)});',
'}'
],
choice: [
'#block emit(alternative)',
'#block nextAlternativesCode'
],
"choice.next": [
'if (#{r(node.resultIndex)} === null) {',
' #block code',
'}'
],
action: [
'#{r(node.posIndex)} = pos;',
'#block emit(node.expression)',
'if (#{r(node.resultIndex)} !== null) {',
' reportedPos = #{r(node.posIndex)};',
' #{r(node.resultIndex)} = (function(#{keys(node.params).join(", ")}) {#{node.code}})(#{map(values(node.params), r).join(", ")});',
'}',
'if (#{r(node.resultIndex)} === null) {',
' pos = #{r(node.posIndex)};',
'}'
],
sequence: [
'#{r(node.posIndex)} = pos;',
'#block code'
],
"sequence.iteration": [
'#block emit(element)',
'if (#{r(element.resultIndex)} !== null) {',
' #block code',
'} else {',
' #{r(node.resultIndex)} = null;',
' pos = #{r(node.posIndex)};',
'}'
],
"sequence.inner": [
'#{r(node.resultIndex)} = [#{map(pluck(node.elements, "resultIndex"), r).join(", ")}];'
],
text: [
'#{r(node.posIndex)} = pos;',
'#block emit(node.expression)',
'if (#{r(node.resultIndex)} !== null) {',
' #{r(node.resultIndex)} = input.substring(pos, #{r(node.posIndex)});',
'}'
],
simple_and: [
'#{r(node.posIndex)} = pos;',
'reportFailures++;',
'#block emit(node.expression)',
'reportFailures--;',
'if (#{r(node.resultIndex)} !== null) {',
' #{r(node.resultIndex)} = "";',
' pos = #{r(node.posIndex)};',
'} else {',
' #{r(node.resultIndex)} = null;',
'}'
],
simple_not: [
'#{r(node.posIndex)} = pos;',
'reportFailures++;',
'#block emit(node.expression)',
'reportFailures--;',
'if (#{r(node.resultIndex)} === null) {',
' #{r(node.resultIndex)} = "";',
'} else {',
' #{r(node.resultIndex)} = null;',
' pos = #{r(node.posIndex)};',
'}'
],
semantic_and: [
'reportedPos = pos;',
'#{r(node.resultIndex)} = (function(#{keys(node.params).join(", ")}) {#{node.code}})(#{map(values(node.params), r).join(", ")}) ? "" : null;'
],
semantic_not: [
'reportedPos = pos;',
'#{r(node.resultIndex)} = (function(#{keys(node.params).join(", ")}) {#{node.code}})(#{map(values(node.params), r).join(", ")}) ? null : "";'
],
optional: [
'#block emit(node.expression)',
'#{r(node.resultIndex)} = #{r(node.resultIndex)} !== null ? #{r(node.resultIndex)} : "";'
],
zero_or_more: [
'#{r(node.resultIndex)} = [];',
'#block emit(node.expression)',
'while (#{r(node.expression.resultIndex)} !== null) {',
' #{r(node.resultIndex)}.push(#{r(node.expression.resultIndex)});',
' #block emit(node.expression)',
'}'
],
one_or_more: [
'#block emit(node.expression)',
'if (#{r(node.expression.resultIndex)} !== null) {',
' #{r(node.resultIndex)} = [];',
' while (#{r(node.expression.resultIndex)} !== null) {',
' #{r(node.resultIndex)}.push(#{r(node.expression.resultIndex)});',
' #block emit(node.expression)',
' }',
'} else {',
' #{r(node.resultIndex)} = null;',
'}'
],
rule_ref: [
'#{r(node.resultIndex)} = parse_#{node.name}();'
],
literal: [
'#if node.value.length === 0',
' #{r(node.resultIndex)} = "";',
'#else',
' #if !node.ignoreCase',
' #if node.value.length === 1',
' if (input.charCodeAt(pos) === #{node.value.charCodeAt(0)}) {',
' #else',
' if (input.substr(pos, #{node.value.length}) === #{string(node.value)}) {',
' #end',
' #else',
/*
* One-char literals are not optimized when case-insensitive
* matching is enabled. This is because there is no simple way to
* lowercase a character code that works for character outside ASCII
* letters. Moreover, |toLowerCase| can change string length,
* meaning the result of lowercasing a character can be more
* characters.
*/
' if (input.substr(pos, #{node.value.length}).toLowerCase() === #{string(node.value.toLowerCase())}) {',
' #end',
' #if !node.ignoreCase',
' #{r(node.resultIndex)} = #{string(node.value)};',
' #else',
' #{r(node.resultIndex)} = input.substr(pos, #{node.value.length});',
' #end',
' #{node.value.length > 1 ? "pos += " + node.value.length : "pos++"};',
' } else {',
' #{r(node.resultIndex)} = null;',
' if (reportFailures === 0) {',
' matchFailed(#{string(string(node.value))});',
' }',
' }',
'#end'
],
"class": [
'if (#{regexp}.test(input.charAt(pos))) {',
' #{r(node.resultIndex)} = input.charAt(pos);',
' pos++;',
'} else {',
' #{r(node.resultIndex)} = null;',
' if (reportFailures === 0) {',
' matchFailed(#{string(node.rawText)});',
' }',
'}'
],
any: [
'if (input.length > pos) {',
' #{r(node.resultIndex)} = input.charAt(pos);',
' pos++;',
'} else {',
' #{r(node.resultIndex)} = null;',
' if (reportFailures === 0) {',
' matchFailed("any character");',
' }',
'}'
]
};
for (name in sources) {
templates[name] = Codie.template(sources[name].join('\n'));
}
return templates;
})();
function fill(name, vars) {
vars.string = utils.quote;
vars.range = utils.range;
vars.map = utils.map;
vars.pluck = utils.pluck;
vars.keys = utils.keys;
vars.values = utils.values;
vars.emit = emit;
vars.options = options;
vars.r = function(index) { return "r" + index; };
return templates[name](vars);
}
function emitSimple(name) {
return function(node) { return fill(name, { node: node }); };
}
var emit = utils.buildNodeVisitor({
grammar: emitSimple("grammar"),
initializer: function(node) { return node.code; },
rule: emitSimple("rule"),
/*
* The contract for all code fragments generated by the following functions
* is as follows.
*
* The code fragment tries to match a part of the input starting with the
* position indicated in |pos|. That position may point past the end of the
* input.
*
* * If the code fragment matches the input, it advances |pos| to point to
* the first chracter following the matched part of the input and sets a
* register with index specified by |node.resultIndex| to an appropriate
* value. This value is always non-|null|.
*
* * If the code fragment does not match the input, it returns with |pos|
* set to the original value and it sets a register with index specified
* by |node.posIndex| to |null|.
*
* The code uses only registers with indices specified by |node.resultIndex|
* and |node.posIndex| where |node| is the processed node or some of its
* subnodes. It does not use any other registers.
*/
named: emitSimple("named"),
choice: function(node) {
var code, nextAlternativesCode;
for (var i = node.alternatives.length - 1; i >= 0; i--) {
nextAlternativesCode = i !== node.alternatives.length - 1
? fill("choice.next", { node: node, code: code })
: '';
code = fill("choice", {
alternative: node.alternatives[i],
nextAlternativesCode: nextAlternativesCode
});
}
return code;
},
action: emitSimple("action"),
sequence: function(node) {
var code = fill("sequence.inner", { node: node });
for (var i = node.elements.length - 1; i >= 0; i--) {
code = fill("sequence.iteration", {
node: node,
element: node.elements[i],
code: code
});
}
return fill("sequence", { node: node, code: code });
},
labeled: function(node) { return emit(node.expression); },
text: emitSimple("text"),
simple_and: emitSimple("simple_and"),
simple_not: emitSimple("simple_not"),
semantic_and: emitSimple("semantic_and"),
semantic_not: emitSimple("semantic_not"),
optional: emitSimple("optional"),
zero_or_more: emitSimple("zero_or_more"),
one_or_more: emitSimple("one_or_more"),
rule_ref: emitSimple("rule_ref"),
literal: emitSimple("literal"),
"class": function(node) {
var regexp;
if (node.parts.length > 0) {
regexp = '/^['
+ (node.inverted ? '^' : '')
+ utils.map(node.parts, function(part) {
return part instanceof Array
? utils.quoteForRegexpClass(part[0])
+ '-'
+ utils.quoteForRegexpClass(part[1])
: utils.quoteForRegexpClass(part);
}).join('')
+ ']/' + (node.ignoreCase ? 'i' : '');
} else {
/*
* Stupid IE considers regexps /[]/ and /[^]/ syntactically invalid, so
* we translate them into euqivalents it can handle.
*/
regexp = node.inverted ? '/^[\\S\\s]/' : '/^(?!)/';
}
return fill("class", { node: node, regexp: regexp });
},
any: emitSimple("any")
});
ast.code = emit(ast);
};

@ -0,0 +1,950 @@
var utils = require("../../utils"),
op = require("../opcodes");
/* Generates parser JavaScript code. */
module.exports = function(ast, options) {
options = utils.clone(options);
utils.defaults(options, {
cache: false,
allowedStartRules: [ast.startRule],
optimize: "speed"
});
/* These only indent non-empty lines to avoid trailing whitespace. */
function indent2(code) { return code.replace(/^(.+)$/gm, ' $1'); }
function indent4(code) { return code.replace(/^(.+)$/gm, ' $1'); }
function indent8(code) { return code.replace(/^(.+)$/gm, ' $1'); }
function indent10(code) { return code.replace(/^(.+)$/gm, ' $1'); }
function generateTables() {
if (options.optimize === "size") {
return [
'peg$consts = [',
indent2(ast.consts.join(',\n')),
'],',
'',
'peg$bytecode = [',
indent2(utils.map(
ast.rules,
function(rule) {
return 'peg$decode('
+ utils.quote(utils.map(
rule.bytecode,
function(b) { return String.fromCharCode(b + 32); }
).join(''))
+ ')';
}
).join(',\n')),
'],'
].join('\n');
} else {
return utils.map(
ast.consts,
function(c, i) { return 'peg$c' + i + ' = ' + c + ','; }
).join('\n');
}
}
function generateCacheHeader(ruleIndexCode) {
return [
'var key = peg$currPos * ' + ast.rules.length + ' + ' + ruleIndexCode + ',',
' cached = peg$cache[key];',
'',
'if (cached) {',
' peg$currPos = cached.nextPos;',
' return cached.result;',
'}',
''
].join('\n');
}
function generateCacheFooter(resultCode) {
return [
'',
'peg$cache[key] = { nextPos: peg$currPos, result: ' + resultCode + ' };'
].join('\n');
}
function generateInterpreter() {
var parts = [];
function generateCondition(cond, argsLength) {
var baseLength = argsLength + 3,
thenLengthCode = 'bc[ip + ' + (baseLength - 2) + ']',
elseLengthCode = 'bc[ip + ' + (baseLength - 1) + ']';
return [
'ends.push(end);',
'ips.push(ip + ' + baseLength + ' + ' + thenLengthCode + ' + ' + elseLengthCode + ');',
'',
'if (' + cond + ') {',
' end = ip + ' + baseLength + ' + ' + thenLengthCode + ';',
' ip += ' + baseLength + ';',
'} else {',
' end = ip + ' + baseLength + ' + ' + thenLengthCode + ' + ' + elseLengthCode + ';',
' ip += ' + baseLength + ' + ' + thenLengthCode + ';',
'}',
'',
'break;'
].join('\n');
}
function generateLoop(cond) {
var baseLength = 2,
bodyLengthCode = 'bc[ip + ' + (baseLength - 1) + ']';
return [
'if (' + cond + ') {',
' ends.push(end);',
' ips.push(ip);',
'',
' end = ip + ' + baseLength + ' + ' + bodyLengthCode + ';',
' ip += ' + baseLength + ';',
'} else {',
' ip += ' + baseLength + ' + ' + bodyLengthCode + ';',
'}',
'',
'break;'
].join('\n');
}
function generateCall() {
var baseLength = 4,
paramsLengthCode = 'bc[ip + ' + (baseLength - 1) + ']';
return [
'params = bc.slice(ip + ' + baseLength + ', ip + ' + baseLength + ' + ' + paramsLengthCode + ');',
'for (i = 0; i < ' + paramsLengthCode + '; i++) {',
' params[i] = stack[stack.length - 1 - params[i]];',
'}',
'',
'stack.splice(',
' stack.length - bc[ip + 2],',
' bc[ip + 2],',
' peg$consts[bc[ip + 1]].apply(null, params)',
');',
'',
'ip += ' + baseLength + ' + ' + paramsLengthCode + ';',
'break;'
].join('\n');
}
parts.push([
'function peg$decode(s) {',
' var bc = new Array(s.length), i;',
'',
' for (i = 0; i < s.length; i++) {',
' bc[i] = s.charCodeAt(i) - 32;',
' }',
'',
' return bc;',
'}',
'',
'function peg$parseRule(index) {',
' var bc = peg$bytecode[index],',
' ip = 0,',
' ips = [],',
' end = bc.length,',
' ends = [],',
' stack = [],',
' params, i;',
''
].join('\n'));
if (options.cache) {
parts.push(indent2(generateCacheHeader('index')));
}
parts.push([
' function protect(object) {',
' return Object.prototype.toString.apply(object) === "[object Array]" ? [] : object;',
' }',
'',
/*
* The point of the outer loop and the |ips| & |ends| stacks is to avoid
* recursive calls for interpreting parts of bytecode. In other words, we
* implement the |interpret| operation of the abstract machine without
* function calls. Such calls would likely slow the parser down and more
* importantly cause stack overflows for complex grammars.
*/
' while (true) {',
' while (ip < end) {',
' switch (bc[ip]) {',
' case ' + op.PUSH + ':', // PUSH c
/*
* Hack: One of the constants can be an empty array. It needs to be cloned
* because it can be modified later on the stack by |APPEND|.
*/
' stack.push(protect(peg$consts[bc[ip + 1]]));',
' ip += 2;',
' break;',
'',
' case ' + op.PUSH_CURR_POS + ':', // PUSH_CURR_POS
' stack.push(peg$currPos);',
' ip++;',
' break;',
'',
' case ' + op.POP + ':', // POP
' stack.pop();',
' ip++;',
' break;',
'',
' case ' + op.POP_CURR_POS + ':', // POP_CURR_POS
' peg$currPos = stack.pop();',
' ip++;',
' break;',
'',
' case ' + op.POP_N + ':', // POP_N n
' stack.length -= bc[ip + 1];',
' ip += 2;',
' break;',
'',
' case ' + op.NIP + ':', // NIP
' stack.splice(-2, 1);',
' ip++;',
' break;',
'',
' case ' + op.NIP_CURR_POS + ':', // NIP_CURR_POS
' peg$currPos = stack.splice(-2, 1)[0];',
' ip++;',
' break;',
'',
' case ' + op.APPEND + ':', // APPEND
' stack[stack.length - 2].push(stack.pop());',
' ip++;',
' break;',
'',
' case ' + op.WRAP + ':', // WRAP n
' stack.push(stack.splice(stack.length - bc[ip + 1]));',
' ip += 2;',
' break;',
'',
' case ' + op.TEXT + ':', // TEXT
' stack.pop();',
' stack.push(input.substring(stack[stack.length - 1], peg$currPos));',
' ip++;',
' break;',
'',
' case ' + op.IF + ':', // IF t, f
indent10(generateCondition('stack[stack.length - 1]', 0)),
'',
' case ' + op.IF_ERROR + ':', // IF_ERROR t, f
indent10(generateCondition(
'stack[stack.length - 1] === null',
0
)),
'',
' case ' + op.IF_NOT_ERROR + ':', // IF_NOT_ERROR t, f
indent10(
generateCondition('stack[stack.length - 1] !== null',
0
)),
'',
' case ' + op.WHILE_NOT_ERROR + ':', // WHILE_NOT_ERROR b
indent10(generateLoop('stack[stack.length - 1] !== null')),
'',
' case ' + op.MATCH_ANY + ':', // MATCH_ANY a, f, ...
indent10(generateCondition('input.length > peg$currPos', 0)),
'',
' case ' + op.MATCH_STRING + ':', // MATCH_STRING s, a, f, ...
indent10(generateCondition(
'input.substr(peg$currPos, peg$consts[bc[ip + 1]].length) === peg$consts[bc[ip + 1]]',
1
)),
'',
' case ' + op.MATCH_STRING_IC + ':', // MATCH_STRING_IC s, a, f, ...
indent10(generateCondition(
'input.substr(peg$currPos, peg$consts[bc[ip + 1]].length).toLowerCase() === peg$consts[bc[ip + 1]]',
1
)),
'',
' case ' + op.MATCH_REGEXP + ':', // MATCH_REGEXP r, a, f, ...
indent10(generateCondition(
'peg$consts[bc[ip + 1]].test(input.charAt(peg$currPos))',
1
)),
'',
' case ' + op.ACCEPT_N + ':', // ACCEPT_N n
' stack.push(input.substr(peg$currPos, bc[ip + 1]));',
' peg$currPos += bc[ip + 1];',
' ip += 2;',
' break;',
'',
' case ' + op.ACCEPT_STRING + ':', // ACCEPT_STRING s
' stack.push(peg$consts[bc[ip + 1]]);',
' peg$currPos += peg$consts[bc[ip + 1]].length;',
' ip += 2;',
' break;',
'',
' case ' + op.FAIL + ':', // FAIL e
' stack.push(null);',
' if (peg$silentFails === 0) {',
' peg$fail(peg$consts[bc[ip + 1]]);',
' }',
' ip += 2;',
' break;',
'',
' case ' + op.REPORT_SAVED_POS + ':', // REPORT_SAVED_POS p
' peg$reportedPos = stack[stack.length - 1 - bc[ip + 1]];',
' ip += 2;',
' break;',
'',
' case ' + op.REPORT_CURR_POS + ':', // REPORT_CURR_POS
' peg$reportedPos = peg$currPos;',
' ip++;',
' break;',
'',
' case ' + op.CALL + ':', // CALL f, n, pc, p1, p2, ..., pN
indent10(generateCall()),
'',
' case ' + op.RULE + ':', // RULE r
' stack.push(peg$parseRule(bc[ip + 1]));',
' ip += 2;',
' break;',
'',
' case ' + op.SILENT_FAILS_ON + ':', // SILENT_FAILS_ON
' peg$silentFails++;',
' ip++;',
' break;',
'',
' case ' + op.SILENT_FAILS_OFF + ':', // SILENT_FAILS_OFF
' peg$silentFails--;',
' ip++;',
' break;',
'',
' default:',
' throw new Error("Invalid opcode: " + bc[ip] + ".");',
' }',
' }',
'',
' if (ends.length > 0) {',
' end = ends.pop();',
' ip = ips.pop();',
' } else {',
' break;',
' }',
' }'
].join('\n'));
if (options.cache) {
parts.push(indent2(generateCacheFooter('stack[0]')));
}
parts.push([
'',
' return stack[0];',
'}'
].join('\n'));
return parts.join('\n');
}
function generateRuleFunction(rule) {
var parts = [], code;
function c(i) { return "peg$c" + i; } // |consts[i]| of the abstract machine
function s(i) { return "s" + i; } // |stack[i]| of the abstract machine
var stack = {
sp: -1,
maxSp: -1,
push: function(exprCode) {
var code = s(++this.sp) + ' = ' + exprCode + ';';
if (this.sp > this.maxSp) { this.maxSp = this.sp; }
return code;
},
pop: function() {
var n, values;
if (arguments.length === 0) {
return s(this.sp--);
} else {
n = arguments[0];
values = utils.map(utils.range(this.sp - n + 1, this.sp + 1), s);
this.sp -= n;
return values;
}
},
top: function() {
return s(this.sp);
},
index: function(i) {
return s(this.sp - i);
}
};
function compile(bc) {
var ip = 0,
end = bc.length,
parts = [],
value;
function compileCondition(cond, argCount) {
var baseLength = argCount + 3,
thenLength = bc[ip + baseLength - 2],
elseLength = bc[ip + baseLength - 1],
baseSp = stack.sp,
thenCode, elseCode;
ip += baseLength;
thenCode = compile(bc.slice(ip, ip + thenLength));
ip += thenLength;
if (elseLength > 0) {
stack.sp = baseSp;
elseCode = compile(bc.slice(ip, ip + elseLength));
ip += elseLength;
}
parts.push('if (' + cond + ') {');
parts.push(indent2(thenCode));
if (elseLength > 0) {
parts.push('} else {');
parts.push(indent2(elseCode));
}
parts.push('}');
}
function compileLoop(cond) {
var baseLength = 2,
bodyLength = bc[ip + baseLength - 1],
bodyCode;
ip += baseLength;
bodyCode = compile(bc.slice(ip, ip + bodyLength));
ip += bodyLength;
parts.push('while (' + cond + ') {');
parts.push(indent2(bodyCode));
parts.push('}');
}
function compileCall(cond) {
var baseLength = 4,
paramsLength = bc[ip + baseLength - 1];
var value = c(bc[ip + 1]) + '('
+ utils.map(
bc.slice(ip + baseLength, ip + baseLength + paramsLength),
stackIndex
)
+ ')';
stack.pop(bc[ip + 2]);
parts.push(stack.push(value));
ip += baseLength + paramsLength;
}
/*
* Extracted into a function just to silence JSHint complaining about
* creating functions in a loop.
*/
function stackIndex(p) {
return stack.index(p);
}
while (ip < end) {
switch (bc[ip]) {
case op.PUSH: // PUSH c
/*
* Hack: One of the constants can be an empty array. It needs to be
* handled specially because it can be modified later on the stack
* by |APPEND|.
*/
parts.push(
stack.push(ast.consts[bc[ip + 1]] === "[]" ? "[]" : c(bc[ip + 1]))
);
ip += 2;
break;
case op.PUSH_CURR_POS: // PUSH_CURR_POS
parts.push(stack.push('peg$currPos'));
ip++;
break;
case op.POP: // POP
stack.pop();
ip++;
break;
case op.POP_CURR_POS: // POP_CURR_POS
parts.push('peg$currPos = ' + stack.pop() + ';');
ip++;
break;
case op.POP_N: // POP_N n
stack.pop(bc[ip + 1]);
ip += 2;
break;
case op.NIP: // NIP
value = stack.pop();
stack.pop();
parts.push(stack.push(value));
ip++;
break;
case op.NIP_CURR_POS: // NIP_CURR_POS
value = stack.pop();
parts.push('peg$currPos = ' + stack.pop() + ';');
parts.push(stack.push(value));
ip++;
break;
case op.APPEND: // APPEND
value = stack.pop();
parts.push(stack.top() + '.push(' + value + ');');
ip++;
break;
case op.WRAP: // WRAP n
parts.push(
stack.push('[' + stack.pop(bc[ip + 1]).join(', ') + ']')
);
ip += 2;
break;
case op.TEXT: // TEXT
stack.pop();
parts.push(
stack.push('input.substring(' + stack.top() + ', peg$currPos)')
);
ip++;
break;
case op.IF: // IF t, f
compileCondition(stack.top(), 0);
break;
case op.IF_ERROR: // IF_ERROR t, f
compileCondition(stack.top() + ' === null', 0);
break;
case op.IF_NOT_ERROR: // IF_NOT_ERROR t, f
compileCondition(stack.top() + ' !== null', 0);
break;
case op.WHILE_NOT_ERROR: // WHILE_NOT_ERROR b
compileLoop(stack.top() + ' !== null', 0);
break;
case op.MATCH_ANY: // MATCH_ANY a, f, ...
compileCondition('input.length > peg$currPos', 0);
break;
case op.MATCH_STRING: // MATCH_STRING s, a, f, ...
compileCondition(
eval(ast.consts[bc[ip + 1]]).length > 1
? 'input.substr(peg$currPos, '
+ eval(ast.consts[bc[ip + 1]]).length
+ ') === '
+ c(bc[ip + 1])
: 'input.charCodeAt(peg$currPos) === '
+ eval(ast.consts[bc[ip + 1]]).charCodeAt(0),
1
);
break;
case op.MATCH_STRING_IC: // MATCH_STRING_IC s, a, f, ...
compileCondition(
'input.substr(peg$currPos, '
+ ast.consts[bc[ip + 1]].length
+ ').toLowerCase() === '
+ c(bc[ip + 1]),
1
);
break;
case op.MATCH_REGEXP: // MATCH_REGEXP r, a, f, ...
compileCondition(
c(bc[ip + 1]) + '.test(input.charAt(peg$currPos))',
1
);
break;
case op.ACCEPT_N: // ACCEPT_N n
parts.push(stack.push(
bc[ip + 1] > 1
? 'input.substr(peg$currPos, ' + bc[ip + 1] + ')'
: 'input.charAt(peg$currPos)'
));
parts.push(
bc[ip + 1] > 1
? 'peg$currPos += ' + bc[ip + 1] + ';'
: 'peg$currPos++;'
);
ip += 2;
break;
case op.ACCEPT_STRING: // ACCEPT_STRING s
parts.push(stack.push(c(bc[ip + 1])));
parts.push(
eval(ast.consts[bc[ip + 1]]).length > 1
? 'peg$currPos += ' + eval(ast.consts[bc[ip + 1]]).length + ';'
: 'peg$currPos++;'
);
ip += 2;
break;
case op.FAIL: // FAIL e
parts.push(stack.push('null'));
parts.push('if (peg$silentFails === 0) { peg$fail(' + c(bc[ip + 1]) + '); }');
ip += 2;
break;
case op.REPORT_SAVED_POS: // REPORT_SAVED_POS p
parts.push('peg$reportedPos = ' + stack.index(bc[ip + 1]) + ';');
ip += 2;
break;
case op.REPORT_CURR_POS: // REPORT_CURR_POS
parts.push('peg$reportedPos = peg$currPos;');
ip++;
break;
case op.CALL: // CALL f, n, pc, p1, p2, ..., pN
compileCall();
break;
case op.RULE: // RULE r
parts.push(stack.push("peg$parse" + ast.rules[bc[ip + 1]].name + "()"));
ip += 2;
break;
case op.SILENT_FAILS_ON: // SILENT_FAILS_ON
parts.push('peg$silentFails++;');
ip++;
break;
case op.SILENT_FAILS_OFF: // SILENT_FAILS_OFF
parts.push('peg$silentFails--;');
ip++;
break;
default:
throw new Error("Invalid opcode: " + bc[ip] + ".");
}
}
return parts.join('\n');
}
code = compile(rule.bytecode);
parts.push([
'function peg$parse' + rule.name + '() {',
' var ' + utils.map(utils.range(0, stack.maxSp + 1), s).join(', ') + ';',
''
].join('\n'));
if (options.cache) {
parts.push(indent2(
generateCacheHeader(utils.indexOfRuleByName(ast, rule.name))
));
}
parts.push(indent2(code));
if (options.cache) {
parts.push(indent2(generateCacheFooter('s0')));
}
parts.push([
'',
' return s0;',
'}'
].join('\n'));
return parts.join('\n');
}
var parts = [],
startRuleIndices, startRuleIndex,
startRuleFunctions, startRuleFunction;
parts.push([
'(function() {',
' /*',
' * Generated by PEG.js 0.7.0.',
' *',
' * http://pegjs.majda.cz/',
' */',
'',
' function subclass(child, parent) {',
' function ctor() { this.constructor = child; }',
' ctor.prototype = parent.prototype;',
' child.prototype = new ctor();',
' }',
'',
' function SyntaxError(expected, found, offset, line, column) {',
' function buildMessage(expected, found) {',
' function stringEscape(s) {',
' function hex(ch) { return ch.charCodeAt(0).toString(16).toUpperCase(); }',
'',
/*
* ECMA-262, 5th ed., 7.8.4: All characters may appear literally in a string
* literal except for the closing quote character, backslash, carriage
* return, line separator, paragraph separator, and line feed. Any character
* may appear in the form of an escape sequence.
*
* For portability, we also escape escape all control and non-ASCII
* characters. Note that "\0" and "\v" escape sequences are not used because
* JSHint does not like the first and IE the second.
*/
' return s',
' .replace(/\\\\/g, \'\\\\\\\\\')', // backslash
' .replace(/"/g, \'\\\\"\')', // closing double quote
' .replace(/\\x08/g, \'\\\\b\')', // backspace
' .replace(/\\t/g, \'\\\\t\')', // horizontal tab
' .replace(/\\n/g, \'\\\\n\')', // line feed
' .replace(/\\f/g, \'\\\\f\')', // form feed
' .replace(/\\r/g, \'\\\\r\')', // carriage return
' .replace(/[\\x00-\\x07\\x0B\\x0E\\x0F]/g, function(ch) { return \'\\\\x0\' + hex(ch); })',
' .replace(/[\\x10-\\x1F\\x80-\\xFF]/g, function(ch) { return \'\\\\x\' + hex(ch); })',
' .replace(/[\\u0180-\\u0FFF]/g, function(ch) { return \'\\\\u0\' + hex(ch); })',
' .replace(/[\\u1080-\\uFFFF]/g, function(ch) { return \'\\\\u\' + hex(ch); });',
' }',
'',
' var expectedDesc, foundDesc;',
'',
' switch (expected.length) {',
' case 0:',
' expectedDesc = "end of input";',
' break;',
'',
' case 1:',
' expectedDesc = expected[0];',
' break;',
'',
' default:',
' expectedDesc = expected.slice(0, -1).join(", ")',
' + " or "',
' + expected[expected.length - 1];',
' }',
'',
' foundDesc = found ? "\\"" + stringEscape(found) + "\\"" : "end of input";',
'',
' return "Expected " + expectedDesc + " but " + foundDesc + " found.";',
' }',
'',
' this.expected = expected;',
' this.found = found;',
' this.offset = offset;',
' this.line = line;',
' this.column = column;',
'',
' this.name = "SyntaxError";',
' this.message = buildMessage(expected, found);',
' }',
'',
' subclass(SyntaxError, Error);',
'',
' function parse(input) {',
' var options = arguments.length > 1 ? arguments[1] : {},',
''
].join('\n'));
if (options.optimize === "size") {
startRuleIndices = '{ '
+ utils.map(
options.allowedStartRules,
function(r) { return r + ': ' + utils.indexOfRuleByName(ast, r); }
).join(', ')
+ ' }';
startRuleIndex = utils.indexOfRuleByName(ast, options.allowedStartRules[0]);
parts.push([
' peg$startRuleIndices = ' + startRuleIndices + ',',
' peg$startRuleIndex = ' + startRuleIndex + ','
].join('\n'));
} else {
startRuleFunctions = '{ '
+ utils.map(
options.allowedStartRules,
function(r) { return r + ': peg$parse' + r; }
).join(', ')
+ ' }';
startRuleFunction = 'peg$parse' + options.allowedStartRules[0];
parts.push([
' peg$startRuleFunctions = ' + startRuleFunctions + ',',
' peg$startRuleFunction = ' + startRuleFunction + ','
].join('\n'));
}
parts.push('');
parts.push(indent8(generateTables()));
parts.push([
'',
' peg$currPos = 0,',
' peg$reportedPos = 0,',
' peg$cachedPos = 0,',
' peg$cachedPosDetails = { line: 1, column: 1, seenCR: false },',
' peg$maxFailPos = 0,',
' peg$maxFailExpected = [],',
' peg$silentFails = 0,', // 0 = report failures, > 0 = silence failures
''
].join('\n'));
if (options.cache) {
parts.push(' peg$cache = {},');
}
parts.push([
' peg$result;',
''
].join('\n'));
if (options.optimize === "size") {
parts.push([
' if ("startRule" in options) {',
' if (!(options.startRule in peg$startRuleIndices)) {',
' throw new Error("Can\'t start parsing from rule \\"" + options.startRule + "\\".");',
' }',
'',
' peg$startRuleIndex = peg$startRuleIndices[options.startRule];',
' }'
].join('\n'));
} else {
parts.push([
' if ("startRule" in options) {',
' if (!(options.startRule in peg$startRuleFunctions)) {',
' throw new Error("Can\'t start parsing from rule \\"" + options.startRule + "\\".");',
' }',
'',
' peg$startRuleFunction = peg$startRuleFunctions[options.startRule];',
' }'
].join('\n'));
}
parts.push([
'',
' function text() {',
' return input.substring(peg$reportedPos, peg$currPos);',
' }',
'',
' function offset() {',
' return peg$reportedPos;',
' }',
'',
' function line() {',
' return peg$computePosDetails(peg$reportedPos).line;',
' }',
'',
' function column() {',
' return peg$computePosDetails(peg$reportedPos).column;',
' }',
'',
' function peg$computePosDetails(pos) {',
' function advance(details, pos) {',
' var p, ch;',
'',
' for (p = 0; p < pos; p++) {',
' ch = input.charAt(p);',
' if (ch === "\\n") {',
' if (!details.seenCR) { details.line++; }',
' details.column = 1;',
' details.seenCR = false;',
' } else if (ch === "\\r" || ch === "\\u2028" || ch === "\\u2029") {',
' details.line++;',
' details.column = 1;',
' details.seenCR = true;',
' } else {',
' details.column++;',
' details.seenCR = false;',
' }',
' }',
' }',
'',
' if (peg$cachedPos !== pos) {',
' if (peg$cachedPos > pos) {',
' peg$cachedPos = 0;',
' peg$cachedPosDetails = { line: 1, column: 1, seenCR: false };',
' }',
' peg$cachedPos = pos;',
' advance(peg$cachedPosDetails, peg$cachedPos);',
' }',
'',
' return peg$cachedPosDetails;',
' }',
'',
' function peg$fail(expected) {',
' if (peg$currPos < peg$maxFailPos) { return; }',
'',
' if (peg$currPos > peg$maxFailPos) {',
' peg$maxFailPos = peg$currPos;',
' peg$maxFailExpected = [];',
' }',
'',
' peg$maxFailExpected.push(expected);',
' }',
'',
' function peg$cleanupExpected(expected) {',
' var i;',
'',
' expected.sort();',
'',
' for (i = 1; i < expected.length; i++) {',
' if (expected[i - 1] === expected[i]) {',
' expected.splice(i, 1);',
' }',
' }',
' }',
''
].join('\n'));
if (options.optimize === "size") {
parts.push(indent4(generateInterpreter()));
parts.push('');
} else {
utils.each(ast.rules, function(rule) {
parts.push(indent4(generateRuleFunction(rule)));
parts.push('');
});
}
if (ast.initializer) {
parts.push(indent4(ast.initializer.code));
parts.push('');
}
if (options.optimize === "size") {
parts.push(' peg$result = peg$parseRule(peg$startRuleIndex);');
} else {
parts.push(' peg$result = peg$startRuleFunction();');
}
parts.push([
'',
' if (peg$result !== null && peg$currPos === input.length) {',
' return peg$result;',
' } else {',
' peg$cleanupExpected(peg$maxFailExpected);',
' peg$reportedPos = Math.max(peg$currPos, peg$maxFailPos);',
'',
' throw new SyntaxError(',
' peg$maxFailExpected,',
' peg$reportedPos < input.length ? input.charAt(peg$reportedPos) : null,',
' peg$reportedPos,',
' peg$computePosDetails(peg$reportedPos).line,',
' peg$computePosDetails(peg$reportedPos).column',
' );',
' }',
' }',
'',
' return {',
' SyntaxError: SyntaxError,',
' parse : parse',
' };',
'})()'
].join('\n'));
ast.code = parts.join('\n');
};

File diff suppressed because it is too large Load Diff

@ -22,6 +22,16 @@ var utils = {
}
},
indexOf: function(array, callback) {
var length = array.length;
for (var i = 0; i < length; i++) {
if (callback(array[i])) {
return i;
}
}
return -1;
},
contains: function(array, value) {
/*
* Stupid IE does not have Array.prototype.indexOf, otherwise this function
@ -209,6 +219,10 @@ var utils = {
findRuleByName: function(ast, name) {
return utils.find(ast.rules, function(r) { return r.name === name; });
},
indexOfRuleByName: function(ast, name) {
return utils.indexOf(ast.rules, function(r) { return r.name === name; });
}
};

@ -19,9 +19,10 @@
"examples/javascript.pegjs",
"examples/json.pegjs",
"lib/compiler.js",
"lib/compiler/opcodes.js",
"lib/compiler/passes.js",
"lib/compiler/passes/allocate-registers.js",
"lib/compiler/passes/generate-code.js",
"lib/compiler/passes/generate-bytecode.js",
"lib/compiler/passes/generate-javascript.js",
"lib/compiler/passes/remove-proxy-rules.js",
"lib/compiler/passes/report-left-recursion.js",
"lib/compiler/passes/report-missing-rules.js",

@ -1,309 +0,0 @@
describe("compiler pass |allocateRegisters|", function() {
var pass = PEG.compiler.passes.allocateRegisters;
function ruleDetails(details) { return { rules: [details] }; }
function expressionDetails(details) {
return ruleDetails({ expression: details });
}
function innerExpressionDetails(details) {
return expressionDetails({ expression: details });
}
var reuseResultDetails = innerExpressionDetails({ resultIndex: 0 }),
allocResultDetails = innerExpressionDetails({ resultIndex: 1 }),
savePosDetails = expressionDetails({ posIndex: 1 }),
scopedDetails = expressionDetails({ params: {} }),
blockedDetails = expressionDetails({
elements: [
{},
{
resultIndex: 3,
posIndex: 5,
elements: [{ resultIndex: 6 }, { resultIndex: 7 }]
}
]
}),
unblockedDetails = expressionDetails({
elements: [
{},
{
resultIndex: 3,
posIndex: 4,
elements: [{ resultIndex: 5 }, { resultIndex: 6 }]
}
]
});
describe("for rule", function() {
it("allocates a new result register for the expression", function() {
expect(pass).toChangeAST('start = "a"', expressionDetails({
resultIndex: 0
}));
});
it("counts used registers", function() {
expect(pass).toChangeAST('start = "a"', ruleDetails({
registerCount: 1
}));
expect(pass).toChangeAST('start = "a"*', ruleDetails({
registerCount: 2
}));
expect(pass).toChangeAST('start = ("a"*)*', ruleDetails({
registerCount: 3
}));
});
it("resets used registers counter", function() {
expect(pass).toChangeAST('a = "a"*; b = "b"', {
rules: [ { registerCount: 2 }, { registerCount: 1 }]
});
});
});
describe("for named", function() {
it("reuses its own result register for the expression", function() {
expect(pass).toChangeAST('start "start" = "a"', reuseResultDetails);
});
});
describe("for choice", function() {
it("reuses its own result register for the alternatives", function() {
expect(pass).toChangeAST('start = "a" / "b" / "c"', expressionDetails({
alternatives: [
{ resultIndex: 0 },
{ resultIndex: 0 },
{ resultIndex: 0 }
]
}));
});
it("creates a new scope", function() {
expect(pass).toChangeAST('start = (a:"a" / "b" / "c") { }', scopedDetails);
});
it("unblocks registers blocked by its children", function() {
expect(pass).toChangeAST(
'start = ((a:"a" / "b" / "c") "d") ("e" "f")',
unblockedDetails
);
});
});
describe("for action", function() {
it("allocates a position register", function() {
expect(pass).toChangeAST('start = "a" { }', savePosDetails);
});
it("reuses its own result register for the expression", function() {
expect(pass).toChangeAST('start = "a" { }', reuseResultDetails);
});
it("creates a new scope", function() {
expect(pass).toChangeAST('start = (a:"a" { }) { }', scopedDetails);
});
it("unblocks registers blocked by its children", function() {
expect(pass).toChangeAST(
'start = ((a:"a" { }) "b") ("c" "d")',
unblockedDetails
);
});
it("computes params", function() {
expect(pass).toChangeAST('start = a:"a" b:"b" c:"c" { }', expressionDetails({
params: { a: 3, b: 4, c: 5 }
}));
});
});
describe("for sequence", function() {
it("allocates a position register", function() {
expect(pass).toChangeAST('start = ', savePosDetails);
expect(pass).toChangeAST('start = "a" "b" "c"', savePosDetails);
});
it("allocates new result registers for the elements", function() {
expect(pass).toChangeAST('start = "a" "b" "c"', expressionDetails({
elements: [{ resultIndex: 2 }, { resultIndex: 3 }, { resultIndex: 4 }]
}));
});
it("does not create a new scope", function() {
expect(pass).toChangeAST(
'start = a:"a" b:"b" c:"c" { }',
expressionDetails({ params: { a: 3, b: 4, c: 5 } })
);
});
it("does not unblock blocked result registers from children", function() {
expect(pass).toChangeAST(
'start = (a:"a" "b") ("c" "d")',
blockedDetails
);
});
});
describe("for labeled", function() {
it("reuses its own result register for the expression", function() {
expect(pass).toChangeAST('start = a:"a"', reuseResultDetails);
});
it("creates a new scope", function() {
expect(pass).toChangeAST('start = a:(b:"b") { }', expressionDetails({
params: { a: 0 }
}));
});
it("unblocks registers blocked by its children", function() {
expect(pass).toChangeAST(
'start = (a:(b:"b") "c") ("d" "e")',
blockedDetails
);
});
it("adds label to the environment", function() {
expect(pass).toChangeAST('start = a:"a" { }', expressionDetails({
params: { a: 0 }
}));
});
it("blocks its own result register", function() {
expect(pass).toChangeAST(
'start = (a:"a" "b") ("c" "d")',
blockedDetails
);
});
});
describe("for text", function() {
it("allocates a position register", function() {
expect(pass).toChangeAST('start = $"a"', savePosDetails);
});
it("reuses its own result register for the expression", function() {
expect(pass).toChangeAST('start = $"a"', reuseResultDetails);
});
it("creates a new scope", function() {
expect(pass).toChangeAST('start = $(a:"a") { }', scopedDetails);
});
it("unblocks registers blocked by its children", function() {
expect(pass).toChangeAST(
'start = ($(a:"a") "b") ("c" "d")',
unblockedDetails
);
});
});
describe("for simple and", function() {
it("allocates a position register", function() {
expect(pass).toChangeAST('start = &"a"', savePosDetails);
});
it("reuses its own result register for the expression", function() {
expect(pass).toChangeAST('start = &"a"', reuseResultDetails);
});
it("creates a new scope", function() {
expect(pass).toChangeAST('start = &(a:"a") { }', scopedDetails);
});
it("unblocks registers blocked by its children", function() {
expect(pass).toChangeAST(
'start = (&(a:"a") "b") ("c" "d")',
unblockedDetails
);
});
});
describe("for simple not", function() {
it("allocates a position register", function() {
expect(pass).toChangeAST('start = !"a"', savePosDetails);
});
it("reuses its own result register for the expression", function() {
expect(pass).toChangeAST('start = !"a"', reuseResultDetails);
});
it("creates a new scope", function() {
expect(pass).toChangeAST('start = !(a:"a") { }', scopedDetails);
});
it("unblocks registers blocked by its children", function() {
expect(pass).toChangeAST(
'start = (!(a:"a") "b") ("c" "d")',
unblockedDetails
);
});
});
describe("for semantic and", function() {
it("computes params", function() {
expect(pass).toChangeAST('start = a:"a" b:"b" c:"c" &{ }', expressionDetails({
elements: [{}, {}, {}, { params: { a: 2, b: 3, c: 4 } }]
}));
});
});
describe("for semantic not", function() {
it("computes params", function() {
expect(pass).toChangeAST('start = a:"a" b:"b" c:"c" !{ }', expressionDetails({
elements: [{}, {}, {}, { params: { a: 2, b: 3, c: 4 } }]
}));
});
});
describe("for optional", function() {
it("reuses its own result register for the expression", function() {
expect(pass).toChangeAST('start = "a"?', reuseResultDetails);
});
it("creates a new scope", function() {
expect(pass).toChangeAST('start = (a:"a")? { }', scopedDetails);
});
it("unblocks registers blocked by its children", function() {
expect(pass).toChangeAST(
'start = ((a:"a")? "b") ("c" "d")',
unblockedDetails
);
});
});
describe("for zero or more", function() {
it("allocates a new result register for the expression", function() {
expect(pass).toChangeAST('start = "a"*', allocResultDetails);
});
it("creates a new scope", function() {
expect(pass).toChangeAST('start = (a:"a")* { }', scopedDetails);
});
it("unblocks registers blocked by its children", function() {
expect(pass).toChangeAST(
'start = ((a:"a")* "b") ("c" "d")',
unblockedDetails
);
});
});
describe("for one or more", function() {
it("allocates a new result register for the expression", function() {
expect(pass).toChangeAST('start = "a"+', allocResultDetails);
});
it("creates a new scope", function() {
expect(pass).toChangeAST('start = (a:"a")+ { }', scopedDetails);
});
it("unblocks registers blocked by its children", function() {
expect(pass).toChangeAST(
'start = ((a:"a")+ "b") ("c" "d")',
unblockedDetails
);
});
});
});

@ -0,0 +1,674 @@
describe("compiler pass |generateBytecode|", function() {
var pass = PEG.compiler.passes.generateBytecode;
function bytecodeDetails(bytecode) {
return {
rules: [{ bytecode: bytecode }]
};
}
function constsDetails(consts) { return { consts: consts }; }
describe("for grammar", function() {
it("generates correct bytecode", function() {
expect(pass).toChangeAST([
'a = "a"',
'b = "b"',
'c = "c"'
].join("\n"), {
rules: [
{ bytecode: [15, 0, 2, 2, 19, 0, 20, 1] },
{ bytecode: [15, 2, 2, 2, 19, 2, 20, 3] },
{ bytecode: [15, 4, 2, 2, 19, 4, 20, 5] }
]
});
});
it("defines correct constants", function() {
expect(pass).toChangeAST([
'a = "a"',
'b = "b"',
'c = "c"'
].join("\n"), constsDetails([
'"a"',
'"\\"a\\""',
'"b"',
'"\\"b\\""',
'"c"',
'"\\"c\\""'
]));
});
});
describe("for rule", function() {
it("generates correct bytecode", function() {
expect(pass).toChangeAST('start = "a"', bytecodeDetails([
15, 0, 2, 2, 19, 0, 20, 1 // <expression>
]));
});
});
describe("for named", function() {
var grammar = 'start "start" = "a"';
it("generates correct bytecode", function() {
expect(pass).toChangeAST(grammar, bytecodeDetails([
25, // SILENT_FAILS_ON
15, 1, 2, 2, 19, 1, 20, 2, // <expression>
26, // SILENT_FAILS_OFF
11, 2, 0, // IF_ERROR
20, 0 // FAIL
]));
});
it("defines correct constants", function() {
expect(pass).toChangeAST(
grammar,
constsDetails(['"start"', '"a"', '"\\"a\\""'])
);
});
});
describe("for choice", function() {
it("generates correct bytecode", function() {
expect(pass).toChangeAST('start = "a" / "b" / "c"', bytecodeDetails([
15, 0, 2, 2, 19, 0, 20, 1, // <alternatives[0]>
11, 21, 0, // IF_ERROR
2, // * POP
15, 2, 2, 2, 19, 2, 20, 3, // <alternatives[1]>
11, 9, 0, // IF_ERROR
2, // * POP
15, 4, 2, 2, 19, 4, 20, 5 // <alternatives[2]>
]));
});
});
describe("for action", function() {
describe("without labels", function() {
var grammar = 'start = { code }';
it("generates correct bytecode", function() {
expect(pass).toChangeAST(grammar, bytecodeDetails([
1, // PUSH_CURR_POS
0, 0, // PUSH
12, 6, 0, // IF_NOT_ERROR
21, 1, // * REPORT_SAVED_POS
23, 1, 1, 0, // CALL
11, 1, 1, // IF_ERROR
6, // * NIP_CURR_POS
5 // * NIP
]));
});
it("defines correct constants", function() {
expect(pass).toChangeAST(
grammar,
constsDetails(['[]', 'function() { code }'])
);
});
});
describe("with one label", function() {
var grammar = 'start = a:"a" { code }';
it("generates correct bytecode", function() {
expect(pass).toChangeAST(grammar, bytecodeDetails([
1, // PUSH_CURR_POS
15, 0, 2, 2, 19, 0, 20, 1, // <expression>
12, 7, 0, // IF_NOT_ERROR
21, 1, // * REPORT_SAVED_POS
23, 2, 1, 1, 0, // CALL
11, 1, 1, // IF_ERROR
6, // * NIP_CURR_POS
5 // * NIP
]));
});
it("defines correct constants", function() {
expect(pass).toChangeAST(
grammar,
constsDetails(['"a"', '"\\"a\\""', 'function(a) { code }'])
);
});
});
describe("with multiple labels", function() {
var grammar = 'start = a:"a" b:"b" c:"c" { code }';
it("generates correct bytecode", function() {
expect(pass).toChangeAST(grammar, bytecodeDetails([
1, // PUSH_CURR_POS
15, 1, 2, 2, 19, 1, 20, 2, // <elements[0]>
12, 46, 4, // IF_NOT_ERROR
15, 3, 2, 2, 19, 3, 20, 4, // * <elements[1]>
12, 30, 5, // IF_NOT_ERROR
15, 5, 2, 2, 19, 5, 20, 6, // * <elements[2]>
12, 14, 5, // IF_NOT_ERROR
21, 3, // * REPORT_SAVED_POS
23, 7, 3, 3, 2, 1, 0, // CALL
11, 1, 1, // IF_ERROR
6, // * NIP_CURR_POS
5, // * NIP
4, 3, // * POP_N
3, // POP_CURR_POS
0, 0, // PUSH
4, 2, // * POP_N
3, // POP_CURR_POS
0, 0, // PUSH
2, // * POP
3, // POP_CURR_POS
0, 0 // PUSH
]));
});
it("defines correct constants", function() {
expect(pass).toChangeAST(grammar, constsDetails([
'null',
'"a"',
'"\\"a\\""',
'"b"',
'"\\"b\\""',
'"c"',
'"\\"c\\""',
'function(a, b, c) { code }'
]));
});
});
});
describe("for sequence", function() {
describe("empty", function() {
var grammar = 'start = ';
it("generates correct bytecode", function() {
expect(pass).toChangeAST(grammar, bytecodeDetails([
0, 0 // PUSH
]));
});
it("defines correct constants", function() {
expect(pass).toChangeAST(grammar, constsDetails(['[]']));
});
});
describe("non-empty", function() {
var grammar = 'start = "a" "b" "c"';
it("generates correct bytecode", function() {
expect(pass).toChangeAST(grammar, bytecodeDetails([
1, // PUSH_CURR_POS
15, 1, 2, 2, 19, 1, 20, 2, // <elements[0]>
12, 35, 4, // IF_NOT_ERROR
15, 3, 2, 2, 19, 3, 20, 4, // * <elements[1]>
12, 19, 5, // IF_NOT_ERROR
15, 5, 2, 2, 19, 5, 20, 6, // * <elements[2]>
12, 3, 5, // IF_NOT_ERROR
8, 3, // * WRAP
5, // NIP
4, 3, // * POP_N
3, // POP_CURR_POS
0, 0, // PUSH
4, 2, // * POP_N
3, // POP_CURR_POS
0, 0, // PUSH
2, // * POP
3, // POP_CURR_POS
0, 0 // PUSH
]));
});
it("defines correct constants", function() {
expect(pass).toChangeAST(
grammar,
constsDetails([
'null',
'"a"',
'"\\"a\\""',
'"b"',
'"\\"b\\""',
'"c"',
'"\\"c\\""'
])
);
});
});
});
describe("for labeled", function() {
it("generates correct bytecode", function() {
expect(pass).toChangeAST('start = a:"a"', bytecodeDetails([
15, 0, 2, 2, 19, 0, 20, 1 // <expression>
]));
});
});
describe("for text", function() {
it("generates correct bytecode", function() {
expect(pass).toChangeAST('start = $"a"', bytecodeDetails([
1, // PUSH_CURR_POS
15, 0, 2, 2, 19, 0, 20, 1, // <expression>
12, 1, 0, // IF_NOT_ERROR
9, // * TEXT
5 // NIP
]));
});
});
describe("for simple and", function() {
var grammar = 'start = &"a"';
it("generates correct bytecode", function() {
expect(pass).toChangeAST(grammar, bytecodeDetails([
1, // PUSH_CURR_POS
25, // SILENT_FAILS_ON
15, 2, 2, 2, 19, 2, 20, 3, // <expression>
26, // SILENT_FAILS_OFF
12, 4, 4, // IF_NOT_ERROR
2, // * POP
3, // POP_CURR_POS
0, 0, // PUSH
2, // * POP
2, // POP
0, 1 // PUSH
]));
});
it("defines correct constants", function() {
expect(pass).toChangeAST(
grammar,
constsDetails(['""', 'null', '"a"', '"\\"a\\""'])
);
});
});
describe("for simple not", function() {
var grammar = 'start = !"a"';
it("generates correct bytecode", function() {
expect(pass).toChangeAST(grammar, bytecodeDetails([
1, // PUSH_CURR_POS
25, // SILENT_FAILS_ON
15, 2, 2, 2, 19, 2, 20, 3, // <expression>
26, // SILENT_FAILS_OFF
11, 4, 4, // IF_ERROR
2, // * POP
2, // POP
0, 0, // PUSH
2, // * POP
3, // POP_CURR_POS
0, 1 // PUSH
]));
});
it("defines correct constants", function() {
expect(pass).toChangeAST(
grammar,
constsDetails(['""', 'null', '"a"', '"\\"a\\""'])
);
});
});
describe("for semantic and", function() {
describe("without labels", function() {
var grammar = 'start = &{ code }';
it("generates correct bytecode", function() {
expect(pass).toChangeAST(grammar, bytecodeDetails([
22, // REPORT_CURR_POS
23, 0, 0, 0, // CALL
10, 3, 3, // IF
2, // * POP
0, 1, // PUSH
2, // * POP
0, 2 // PUSH
]));
});
it("defines correct constants", function() {
expect(pass).toChangeAST(
grammar,
constsDetails(['function() { code }', '""', 'null'])
);
});
});
describe("with labels", function() {
var grammar = 'start = a:"a" b:"b" c:"c" &{ code }';
it("generates correct bytecode", function() {
expect(pass).toChangeAST(grammar, bytecodeDetails([
1, // PUSH_CURR_POS
15, 1, 2, 2, 19, 1, 20, 2, // <elements[0]>
12, 60, 4, // IF_NOT_ERROR
15, 3, 2, 2, 19, 3, 20, 4, // * <elements[1]>
12, 44, 5, // IF_NOT_ERROR
15, 5, 2, 2, 19, 5, 20, 6, // * <elements[2]>
12, 28, 5, // IF_NOT_ERROR
22, // * REPORT_CURR_POS
23, 7, 0, 3, 2, 1, 0, // CALL
10, 3, 3, // IF
2, // * POP
0, 8, // PUSH
2, // * POP
0, 0, // PUSH
12, 3, 5, // IF_NOT_ERROR
8, 4, // * WRAP
5, // NIP
4, 4, // * POP_N
3, // POP_CURR_POS
0, 0, // PUSH
4, 3, // * POP_N
3, // POP_CURR_POS
0, 0, // PUSH
4, 2, // * POP_N
3, // POP_CURR_POS
0, 0, // PUSH
2, // * POP
3, // POP_CURR_POS
0, 0 // PUSH
]));
});
it("defines correct constants", function() {
expect(pass).toChangeAST(grammar, constsDetails([
'null',
'"a"',
'"\\"a\\""',
'"b"',
'"\\"b\\""',
'"c"',
'"\\"c\\""',
'function(a, b, c) { code }',
'""'
]));
});
});
});
describe("for semantic not", function() {
describe("without labels", function() {
var grammar = 'start = !{ code }';
it("generates correct bytecode", function() {
expect(pass).toChangeAST(grammar, bytecodeDetails([
22, // REPORT_CURR_POS
23, 0, 0, 0, // CALL_PREDICATE
10, 3, 3, // IF
2, // * POP
0, 2, // PUSH
2, // * POP
0, 1 // PUSH
]));
});
it("defines correct constants", function() {
expect(pass).toChangeAST(
grammar,
constsDetails(['function() { code }', '""', 'null'])
);
});
});
describe("with labels", function() {
var grammar = 'start = a:"a" b:"b" c:"c" !{ code }';
it("generates correct bytecode", function() {
expect(pass).toChangeAST(grammar, bytecodeDetails([
1, // PUSH_CURR_POS
15, 1, 2, 2, 19, 1, 20, 2, // <elements[0]>
12, 60, 4, // IF_NOT_ERROR
15, 3, 2, 2, 19, 3, 20, 4, // * <elements[1]>
12, 44, 5, // IF_NOT_ERROR
15, 5, 2, 2, 19, 5, 20, 6, // * <elements[2]>
12, 28, 5, // IF_NOT_ERROR
22, // * REPORT_CURR_POS
23, 7, 0, 3, 2, 1, 0, // CALL
10, 3, 3, // IF
2, // * POP
0, 0, // PUSH
2, // * POP
0, 8, // PUSH
12, 3, 5, // IF_NOT_ERROR
8, 4, // * WRAP
5, // NIP
4, 4, // * POP_N
3, // POP_CURR_POS
0, 0, // PUSH
4, 3, // * POP_N
3, // POP_CURR_POS
0, 0, // PUSH
4, 2, // * POP_N
3, // POP_CURR_POS
0, 0, // PUSH
2, // * POP
3, // POP_CURR_POS
0, 0 // PUSH
]));
});
it("defines correct constants", function() {
expect(pass).toChangeAST(grammar, constsDetails([
'null',
'"a"',
'"\\"a\\""',
'"b"',
'"\\"b\\""',
'"c"',
'"\\"c\\""',
'function(a, b, c) { code }',
'""'
]));
});
});
});
describe("for optional", function() {
var grammar = 'start = "a"?';
it("generates correct bytecode", function() {
expect(pass).toChangeAST(grammar, bytecodeDetails([
15, 1, 2, 2, 19, 1, 20, 2, // <expression>
11, 3, 0, // IF_ERROR
2, // * POP
0, 0 // PUSH
]));
});
it("defines correct constants", function() {
expect(pass).toChangeAST(
grammar,
constsDetails(['""', '"a"', '"\\"a\\""'])
);
});
});
describe("for zero or more", function() {
var grammar = 'start = "a"*';
it("generates correct bytecode", function() {
expect(pass).toChangeAST(grammar, bytecodeDetails([
0, 0, // PUSH
15, 1, 2, 2, 19, 1, 20, 2, // <expression>
13, 9, // WHILE_NOT_ERROR
7, // * APPEND
15, 1, 2, 2, 19, 1, 20, 2, // <expression>
2 // POP
]));
});
it("defines correct constants", function() {
expect(pass).toChangeAST(
grammar,
constsDetails(['[]', '"a"', '"\\"a\\""'])
);
});
});
describe("for one or more", function() {
var grammar = 'start = "a"+';
it("generates correct bytecode", function() {
expect(pass).toChangeAST(grammar, bytecodeDetails([
0, 0, // PUSH
15, 2, 2, 2, 19, 2, 20, 3, // <expression>
12, 12, 4, // IF_NOT_ERROR
13, 9, // * WHILE_NOT_ERROR
7, // * APPEND
15, 2, 2, 2, 19, 2, 20, 3, // <expression>
2, // POP
2, // * POP
2, // POP
0, 1 // PUSH
]));
});
it("defines correct constants", function() {
expect(pass).toChangeAST(
grammar,
constsDetails(['[]', 'null', '"a"', '"\\"a\\""'])
);
});
});
describe("for rule reference", function() {
it("generates correct bytecode", function() {
expect(pass).toChangeAST([
'start = other',
'other = "other"'
].join("\n"), {
rules: [
{
bytecode: [24, 1] // RULE
},
{ }
]
});
});
});
describe("for literal", function() {
describe("empty", function() {
var grammar = 'start = ""';
it("generates correct bytecode", function() {
expect(pass).toChangeAST(grammar, bytecodeDetails([
0, 0 // PUSH
]));
});
it("defines correct constants", function() {
expect(pass).toChangeAST(grammar, constsDetails(['""']));
});
});
describe("non-empty case-sensitive", function() {
var grammar = 'start = "a"';
it("generates correct bytecode", function() {
expect(pass).toChangeAST(grammar, bytecodeDetails([
15, 0, 2, 2, // MATCH_STRING
19, 0, // * ACCEPT_STRING
20, 1 // * FAIL
]));
});
it("defines correct constants", function() {
expect(pass).toChangeAST(grammar, constsDetails(['"a"', '"\\"a\\""']));
});
});
describe("non-empty case-insensitive", function() {
var grammar = 'start = "A"i';
it("generates correct bytecode", function() {
expect(pass).toChangeAST(grammar, bytecodeDetails([
16, 0, 2, 2, // MATCH_STRING_IC
18, 1, // * ACCEPT_N
20, 1 // * FAIL
]));
});
it("defines correct constants", function() {
expect(pass).toChangeAST(grammar, constsDetails(['"a"', '"\\"A\\""']));
});
});
});
describe("for class", function() {
it("generates correct bytecode", function() {
expect(pass).toChangeAST('start = [a]', bytecodeDetails([
17, 0, 2, 2, // MATCH_REGEXP
18, 1, // * ACCEPT_N
20, 1 // * FAIL
]));
});
describe("non-empty non-inverted case-sensitive", function() {
it("defines correct constants", function() {
expect(pass).toChangeAST(
'start = [a]',
constsDetails(['/^[a]/', '"[a]"'])
);
});
});
describe("non-empty inverted case-sensitive", function() {
it("defines correct constants", function() {
expect(pass).toChangeAST(
'start = [^a]',
constsDetails(['/^[^a]/', '"[^a]"'])
);
});
});
describe("non-empty non-inverted case-insensitive", function() {
it("defines correct constants", function() {
expect(pass).toChangeAST(
'start = [a]i',
constsDetails(['/^[a]/i', '"[a]i"'])
);
});
});
describe("non-empty complex", function() {
it("defines correct constants", function() {
expect(pass).toChangeAST(
'start = [ab-def-hij-l]',
constsDetails(['/^[ab-def-hij-l]/', '"[ab-def-hij-l]"'])
);
});
});
describe("empty non-inverted", function() {
it("defines correct constants", function() {
expect(pass).toChangeAST(
'start = []',
constsDetails(['/^(?!)/', '"[]"'])
);
});
});
describe("empty inverted", function() {
it("defines correct constants", function() {
expect(pass).toChangeAST(
'start = [^]',
constsDetails(['/^[\\S\\s]/', '"[^]"'])
);
});
});
});
describe("for any", function() {
var grammar = 'start = .';
it("generates bytecode", function() {
expect(pass).toChangeAST(grammar, bytecodeDetails([
14, 2, 2, // MATCH_ANY
18, 1, // * ACCEPT_N
20, 0 // * FAIL
]));
});
it("defines correct constants", function() {
expect(pass).toChangeAST(grammar, constsDetails(['"any character"']));
});
});
});

@ -1,6 +1,9 @@
describe("generated parser", function() {
function vary(names, block) {
var values = { cache: [false, true] };
var values = {
cache: [false, true],
optimize: ["speed", "size"]
};
function varyStep(names, options) {
var clonedOptions = {}, key, name, i;
@ -32,7 +35,7 @@ describe("generated parser", function() {
}
function varyAll(block) {
vary(["cache"], block);
vary(["cache", "optimize"], block);
}
beforeEach(function() {

@ -12,7 +12,7 @@
<script src="compiler/passes/report-missing-rules.spec.js"></script>
<script src="compiler/passes/report-left-recursion.spec.js"></script>
<script src="compiler/passes/remove-proxy-rules.spec.js"></script>
<script src="compiler/passes/allocate-registers.spec.js"></script>
<script src="compiler/passes/generate-bytecode.spec.js"></script>
<script>
(function() {
var env = jasmine.getEnv(),

Loading…
Cancel
Save