You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1124 lines
37 KiB
JavaScript
1124 lines
37 KiB
JavaScript
/* PEG.js compiler. */
|
|
|
|
(function() {
|
|
|
|
/* ===== PEG ===== */
|
|
|
|
/* no var */ PEG = {};
|
|
|
|
/*
|
|
* Generates a parser from a specified grammar and returns it.
|
|
*
|
|
* The grammar must be a string in the format described by the metagramar in the
|
|
* metagrammar.pegjs file.
|
|
*
|
|
* Throws |PEG.grammarParser.SyntaxError| if the grammar contains a syntax error
|
|
* or |PEG.GrammarError| if it contains a semantic error. Note that not all
|
|
* errors are detected during the generation and some may protrude to the
|
|
* generated parser and cause its malfunction.
|
|
*/
|
|
PEG.buildParser = function(grammar) {
|
|
return PEG.Compiler.compileParser(PEG.grammarParser.parse(grammar));
|
|
};
|
|
|
|
/* ===== PEG.GrammarError ===== */
|
|
|
|
/* Thrown when the grammar contains an error. */
|
|
|
|
PEG.GrammarError = function(message) {
|
|
this.name = "PEG.GrammarError";
|
|
this.message = message;
|
|
};
|
|
|
|
PEG.GrammarError.prototype = Error.prototype;
|
|
|
|
/* ===== PEG.ArrayUtils ===== */
|
|
|
|
/* Array manipulation utility functions. */
|
|
|
|
PEG.ArrayUtils = {
|
|
/* Like Python's |range|, but without |step|. */
|
|
range: function(start, stop) {
|
|
if (typeof(stop) === "undefined") {
|
|
stop = start;
|
|
start = 0;
|
|
}
|
|
|
|
var result = new Array(Math.max(0, stop - start));
|
|
for (var i = 0, j = start; j < stop; i++, j++) {
|
|
result[i] = j;
|
|
}
|
|
return result;
|
|
},
|
|
|
|
/*
|
|
* The code needs to be in sync with the code template in the compilation
|
|
* function for "action" nodes.
|
|
*/
|
|
contains: function(array, value) {
|
|
/*
|
|
* Stupid IE does not have Array.prototype.indexOf, otherwise this function
|
|
* would be a one-liner.
|
|
*/
|
|
var length = array.length;
|
|
for (var i = 0; i < length; i++) {
|
|
if (array[i] === value) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
},
|
|
|
|
each: function(array, callback) {
|
|
var length = array.length;
|
|
for (var i = 0; i < length; i++) {
|
|
callback(array[i]);
|
|
}
|
|
},
|
|
|
|
map: function(array, callback) {
|
|
var result = [];
|
|
var length = array.length;
|
|
for (var i = 0; i < length; i++) {
|
|
result[i] = callback(array[i]);
|
|
}
|
|
return result;
|
|
}
|
|
};
|
|
|
|
/* ===== PEG.StringUtils ===== */
|
|
|
|
/* String manipulation utility functions. */
|
|
|
|
PEG.StringUtils = {
|
|
/*
|
|
* Surrounds the string with quotes and escapes characters inside so that the
|
|
* result is a valid JavaScript string.
|
|
*
|
|
* The code needs to be in sync with th code template in the compilation
|
|
* function for "action" nodes.
|
|
*/
|
|
quote: function(s) {
|
|
/*
|
|
* ECMA-262, 5th ed., 7.8.4: All characters may appear literally in a string
|
|
* literal except for the closing quote character, backslash, carriage
|
|
* return, line separator, paragraph separator, and line feed. Any character
|
|
* may appear in the form of an escape sequence.
|
|
*/
|
|
return '"' + s
|
|
.replace(/\\/g, '\\\\') // backslash
|
|
.replace(/"/g, '\\"') // closing quote character
|
|
.replace(/\r/g, '\\r') // carriage return
|
|
.replace(/\u2028/g, '\\u2028') // line separator
|
|
.replace(/\u2029/g, '\\u2029') // paragraph separator
|
|
.replace(/\n/g, '\\n') // line feed
|
|
+ '"';
|
|
}
|
|
|
|
};
|
|
|
|
/* ===== PEG.RegExpUtils ===== */
|
|
|
|
/* RegExp manipulation utility functions. */
|
|
|
|
PEG.RegExpUtils = {
|
|
/*
|
|
* Escapes characters inside the string so that it can be used as a list of
|
|
* characters in a character class of a regular expression.
|
|
*/
|
|
quoteForClass: function(s) {
|
|
/* Based on ECMA-262, 5th ed., 7.8.5 & 15.10.1. */
|
|
return s
|
|
.replace(/\\/g, '\\\\') // backslash
|
|
.replace(/\0/g, '\\0') // null, IE needs this
|
|
.replace(/\//g, '\\/') // closing slash
|
|
.replace(/]/g, '\\]') // closing bracket
|
|
.replace(/-/g, '\\-') // dash
|
|
.replace(/\r/g, '\\r') // carriage return
|
|
.replace(/\u2028/g, '\\u2028') // line separator
|
|
.replace(/\u2029/g, '\\u2029') // paragraph separator
|
|
.replace(/\n/g, '\\n') // line feed
|
|
}
|
|
};
|
|
|
|
/* ===== PEG.Compiler ===== */
|
|
|
|
PEG.Compiler = {
|
|
/*
|
|
* Takes parts of code, interpolates variables inside them and joins them with
|
|
* a newline.
|
|
*
|
|
* Variables are delimited with "${" and "}" and their names must be valid
|
|
* identifiers (i.e. they must match [a-zA-Z_][a-zA-Z0-9_]*). Variable values
|
|
* are specified as properties of the last parameter (if this is an object,
|
|
* otherwise empty variable set is assumed). Undefined variables result in
|
|
* throwing |Error|.
|
|
*
|
|
* There can be a filter specified after the variable name, prefixed with "|".
|
|
* The filter name must be a valid identifier. The only recognized filter
|
|
* right now is "string", which quotes the variable value as a JavaScript
|
|
* string. Unrecognized filters result in throwing |Error|.
|
|
*
|
|
* If any part has multiple lines and the first line is indented by some
|
|
* amount of whitespace (as defined by the /\s+/ JavaScript regular
|
|
* expression), second to last lines are indented by the same amount of
|
|
* whitespace. This results in nicely indented multiline code in variables
|
|
* without making the templates look ugly.
|
|
*
|
|
* Examples:
|
|
*
|
|
* PEG.Compiler.formatCode("foo", "bar"); // "foo\nbar"
|
|
* PEG.Compiler.formatCode(
|
|
* "foo", "${bar}",
|
|
* { bar: "baz" }
|
|
* ); // "foo\nbaz"
|
|
* PEG.Compiler.formatCode("foo", "${bar}"); // throws Error
|
|
* PEG.Compiler.formatCode(
|
|
* "foo", "${bar|string}",
|
|
* { bar: "baz" }
|
|
* ); // "foo\n\"baz\""
|
|
* PEG.Compiler.formatCode(
|
|
* "foo", "${bar|eeek}",
|
|
* { bar: "baz" }
|
|
* ); // throws Error
|
|
* PEG.Compiler.formatCode(
|
|
* "foo", "${bar}",
|
|
* { bar: " baz\nqux" }
|
|
* ); // "foo\n baz\n qux"
|
|
*/
|
|
formatCode: function() {
|
|
function interpolateVariablesInParts(parts) {
|
|
return PEG.ArrayUtils.map(parts, function(part) {
|
|
return part.replace(
|
|
/\$\{([a-zA-Z_][a-zA-Z0-9_]*)(\|([a-zA-Z_][a-zA-Z0-9_]*))?\}/g,
|
|
function(match, name, dummy, filter) {
|
|
var value = vars[name];
|
|
if (typeof(value) === "undefined") {
|
|
throw new Error("Undefined variable: \"" + name + "\".");
|
|
}
|
|
|
|
if (typeof(filter) !== "undefined" && filter != "") { // JavaScript engines differ here.
|
|
if (filter === "string") {
|
|
return PEG.StringUtils.quote(value);
|
|
} else {
|
|
throw new Error("Unrecognized filter: \"" + filter + "\".");
|
|
}
|
|
} else {
|
|
return value;
|
|
}
|
|
}
|
|
);
|
|
});
|
|
}
|
|
|
|
function indentMultilineParts(parts) {
|
|
return PEG.ArrayUtils.map(parts, function(part) {
|
|
if (!/\n/.test(part)) { return part; }
|
|
|
|
var firstLineWhitespacePrefix = part.match(/^\s*/)[0];
|
|
var lines = part.split("\n");
|
|
var linesIndented = [lines[0]].concat(
|
|
PEG.ArrayUtils.map(lines.slice(1), function(line) {
|
|
return firstLineWhitespacePrefix + line;
|
|
})
|
|
);
|
|
return linesIndented.join("\n");
|
|
});
|
|
}
|
|
|
|
var args = Array.prototype.slice.call(arguments);
|
|
var vars = args[args.length - 1] instanceof Object ? args.pop() : {};
|
|
|
|
return indentMultilineParts(interpolateVariablesInParts(args)).join("\n");
|
|
},
|
|
|
|
_uniqueIdentifierCounters: {},
|
|
|
|
/*
|
|
* Generates a unique identifier with specified prefix. The sequence of
|
|
* generated identifiers with given prefix is repeatable and will be the same
|
|
* within different language runtimes.
|
|
*/
|
|
generateUniqueIdentifier: function(prefix) {
|
|
this._uniqueIdentifierCounters[prefix]
|
|
= this._uniqueIdentifierCounters[prefix] || 0;
|
|
return prefix + this._uniqueIdentifierCounters[prefix]++;
|
|
},
|
|
|
|
/*
|
|
* Resets internal counters of the unique identifier generator. The sequence
|
|
* of identifiers with given prefix generated by |generateUniqueIdentifier|
|
|
* will start from the beginning.
|
|
*/
|
|
resetUniqueIdentifierCounters: function() {
|
|
this._uniqueIdentifierCounters = {};
|
|
},
|
|
|
|
/*
|
|
* Checks made on the grammar AST before compilation. Each check is a function
|
|
* that is passed the AST and does not return anything. If the check passes,
|
|
* the function does not do anything special, otherwise it throws
|
|
* |PEG.GrammarError|. The checks are run in sequence in order of their
|
|
* definition.
|
|
*/
|
|
_checks: [
|
|
/* Checks that all referenced rules exist. */
|
|
function(ast) {
|
|
function nop() {}
|
|
|
|
function checkExpression(node) { check(node.expression); }
|
|
|
|
function checkSubnodes(propertyName) {
|
|
return function(node) {
|
|
PEG.ArrayUtils.each(node[propertyName], check);
|
|
};
|
|
}
|
|
|
|
var checkFunctions = {
|
|
grammar:
|
|
function(node) {
|
|
for (var name in node.rules) {
|
|
check(node.rules[name]);
|
|
}
|
|
},
|
|
|
|
rule: checkExpression,
|
|
choice: checkSubnodes("alternatives"),
|
|
sequence: checkSubnodes("elements"),
|
|
labeled: checkExpression,
|
|
simple_and: checkExpression,
|
|
simple_not: checkExpression,
|
|
semantic_and: nop,
|
|
semantic_not: nop,
|
|
optional: checkExpression,
|
|
zero_or_more: checkExpression,
|
|
one_or_more: checkExpression,
|
|
action: checkExpression,
|
|
|
|
rule_ref:
|
|
function(node) {
|
|
if (typeof(ast.rules[node.name]) === "undefined") {
|
|
throw new PEG.GrammarError(
|
|
"Referenced rule \"" + node.name + "\" does not exist."
|
|
);
|
|
}
|
|
},
|
|
|
|
literal: nop,
|
|
any: nop,
|
|
"class": nop
|
|
};
|
|
|
|
function check(node) { checkFunctions[node.type](node); }
|
|
|
|
check(ast);
|
|
},
|
|
|
|
/* Checks that no left recursion is present. */
|
|
function(ast) {
|
|
function nop() {}
|
|
|
|
function checkExpression(node, appliedRules) {
|
|
check(node.expression, appliedRules);
|
|
}
|
|
|
|
var checkFunctions = {
|
|
grammar:
|
|
function(node, appliedRules) {
|
|
for (var name in node.rules) {
|
|
check(ast.rules[name], appliedRules);
|
|
}
|
|
},
|
|
|
|
rule:
|
|
function(node, appliedRules) {
|
|
check(node.expression, appliedRules.concat(node.name));
|
|
},
|
|
|
|
choice:
|
|
function(node, appliedRules) {
|
|
PEG.ArrayUtils.each(node.alternatives, function(alternative) {
|
|
check(alternative, appliedRules);
|
|
});
|
|
},
|
|
|
|
sequence:
|
|
function(node, appliedRules) {
|
|
if (node.elements.length > 0) {
|
|
check(node.elements[0], appliedRules);
|
|
}
|
|
},
|
|
|
|
labeled: checkExpression,
|
|
simple_and: checkExpression,
|
|
simple_not: checkExpression,
|
|
semantic_and: nop,
|
|
semantic_not: nop,
|
|
optional: checkExpression,
|
|
zero_or_more: checkExpression,
|
|
one_or_more: checkExpression,
|
|
action: checkExpression,
|
|
|
|
rule_ref:
|
|
function(node, appliedRules) {
|
|
if (PEG.ArrayUtils.contains(appliedRules, node.name)) {
|
|
throw new PEG.GrammarError(
|
|
"Left recursion detected for rule \"" + node.name + "\"."
|
|
);
|
|
}
|
|
check(ast.rules[node.name], appliedRules);
|
|
},
|
|
|
|
literal: nop,
|
|
any: nop,
|
|
"class": nop
|
|
};
|
|
|
|
function check(node, appliedRules) {
|
|
checkFunctions[node.type](node, appliedRules);
|
|
}
|
|
|
|
check(ast, []);
|
|
}
|
|
],
|
|
|
|
/*
|
|
* Optimalization passes made on the grammar AST before compilation. Each pass
|
|
* is a function that is passed the AST and returns a new AST. The AST can be
|
|
* modified in-place by the pass. The passes are run in sequence in order of
|
|
* their definition.
|
|
*/
|
|
_passes: [
|
|
/*
|
|
* Removes proxy rules -- that is, rules that only delegate to other rule.
|
|
*/
|
|
function(ast) {
|
|
function isProxyRule(node) {
|
|
return node.type === "rule" && node.expression.type === "rule_ref";
|
|
}
|
|
|
|
function replaceRuleRefs(ast, from, to) {
|
|
function nop() {}
|
|
|
|
function replaceInExpression(node, from, to) {
|
|
replace(node.expression, from, to);
|
|
}
|
|
|
|
function replaceInSubnodes(propertyName) {
|
|
return function(node, from, to) {
|
|
PEG.ArrayUtils.each(node[propertyName], function(node) {
|
|
replace(node, from, to);
|
|
});
|
|
};
|
|
}
|
|
|
|
var replaceFunctions = {
|
|
grammar:
|
|
function(node, from, to) {
|
|
for (var name in node.rules) {
|
|
replace(ast.rules[name], from, to);
|
|
}
|
|
},
|
|
|
|
rule: replaceInExpression,
|
|
choice: replaceInSubnodes("alternatives"),
|
|
sequence: replaceInSubnodes("elements"),
|
|
labeled: replaceInExpression,
|
|
simple_and: replaceInExpression,
|
|
simple_not: replaceInExpression,
|
|
semantic_and: nop,
|
|
semantic_not: nop,
|
|
optional: replaceInExpression,
|
|
zero_or_more: replaceInExpression,
|
|
one_or_more: replaceInExpression,
|
|
action: replaceInExpression,
|
|
|
|
rule_ref:
|
|
function(node, from, to) {
|
|
if (node.name === from) {
|
|
node.name = to;
|
|
}
|
|
},
|
|
|
|
literal: nop,
|
|
any: nop,
|
|
"class": nop
|
|
};
|
|
|
|
function replace(node, from, to) {
|
|
replaceFunctions[node.type](node, from, to);
|
|
}
|
|
|
|
replace(ast, from, to);
|
|
}
|
|
|
|
for (var name in ast.rules) {
|
|
if (isProxyRule(ast.rules[name])) {
|
|
replaceRuleRefs(ast, ast.rules[name].name, ast.rules[name].expression.name);
|
|
if (name === ast.startRule) {
|
|
ast.startRule = ast.rules[name].expression.name;
|
|
}
|
|
delete ast.rules[name];
|
|
}
|
|
}
|
|
|
|
return ast;
|
|
}
|
|
],
|
|
|
|
_compileFunctions: {
|
|
grammar: function(node) {
|
|
var initializerCode = node.initializer !== null
|
|
? PEG.Compiler.compileNode(node.initializer)
|
|
: "";
|
|
|
|
var parseFunctionDefinitions = [];
|
|
for (var name in node.rules) {
|
|
parseFunctionDefinitions.push(PEG.Compiler.compileNode(node.rules[name]));
|
|
}
|
|
|
|
return PEG.Compiler.formatCode(
|
|
"(function(){",
|
|
" /* Generated by PEG.js (http://pegjs.majda.cz/). */",
|
|
" ",
|
|
" var result = {",
|
|
" /*",
|
|
" * Parses the input with a generated parser. If the parsing is successfull,",
|
|
" * returns a value explicitly or implicitly specified by the grammar from",
|
|
" * which the parser was generated (see |PEG.buildParser|). If the parsing is",
|
|
" * unsuccessful, throws |PEG.grammarParser.SyntaxError| describing the error.",
|
|
" */",
|
|
" parse: function(input) {",
|
|
" var pos = 0;",
|
|
" var rightmostMatchFailuresPos = 0;",
|
|
" var rightmostMatchFailuresExpected = [];",
|
|
" var cache = {};",
|
|
" ",
|
|
/* This needs to be in sync with PEG.StringUtils.quote. */
|
|
" function quoteString(s) {",
|
|
" /*",
|
|
" * ECMA-262, 5th ed., 7.8.4: All characters may appear literally in a",
|
|
" * string literal except for the closing quote character, backslash,",
|
|
" * carriage return, line separator, paragraph separator, and line feed.",
|
|
" * Any character may appear in the form of an escape sequence.",
|
|
" */",
|
|
" return '\"' + s",
|
|
" .replace(/\\\\/g, '\\\\\\\\') // backslash",
|
|
" .replace(/\"/g, '\\\\\"') // closing quote character",
|
|
" .replace(/\\r/g, '\\\\r') // carriage return",
|
|
" .replace(/\\u2028/g, '\\\\u2028') // line separator",
|
|
" .replace(/\\u2029/g, '\\\\u2029') // paragraph separator",
|
|
" .replace(/\\n/g, '\\\\n') // line feed",
|
|
" + '\"';",
|
|
" }",
|
|
" ",
|
|
/* This needs to be in sync with PEG.ArrayUtils.contains. */
|
|
" function arrayContains(array, value) {",
|
|
" /*",
|
|
" * Stupid IE does not have Array.prototype.indexOf, otherwise this",
|
|
" * function would be a one-liner.",
|
|
" */",
|
|
" var length = array.length;",
|
|
" for (var i = 0; i < length; i++) {",
|
|
" if (array[i] === value) {",
|
|
" return true;",
|
|
" }",
|
|
" }",
|
|
" return false;",
|
|
" }",
|
|
" ",
|
|
" function matchFailed(failure) {",
|
|
" if (pos < rightmostMatchFailuresPos) {",
|
|
" return;",
|
|
" }",
|
|
" ",
|
|
" if (pos > rightmostMatchFailuresPos) {",
|
|
" rightmostMatchFailuresPos = pos;",
|
|
" rightmostMatchFailuresExpected = [];",
|
|
" }",
|
|
" ",
|
|
" if (!arrayContains(rightmostMatchFailuresExpected, failure)) {",
|
|
" rightmostMatchFailuresExpected.push(failure);",
|
|
" }",
|
|
" }",
|
|
" ",
|
|
" ${parseFunctionDefinitions}",
|
|
" ",
|
|
" function buildErrorMessage() {",
|
|
" function buildExpected(failuresExpected) {",
|
|
" switch (failuresExpected.length) {",
|
|
" case 0:",
|
|
" return 'end of input';",
|
|
" case 1:",
|
|
" return failuresExpected[0];",
|
|
" default:",
|
|
" failuresExpected.sort();",
|
|
" return failuresExpected.slice(0, failuresExpected.length - 1).join(', ')",
|
|
" + ' or '",
|
|
" + failuresExpected[failuresExpected.length - 1];",
|
|
" }",
|
|
" }",
|
|
" ",
|
|
" var expected = buildExpected(rightmostMatchFailuresExpected);",
|
|
" var actualPos = Math.max(pos, rightmostMatchFailuresPos);",
|
|
" var actual = actualPos < input.length",
|
|
" ? quoteString(input.charAt(actualPos))",
|
|
" : 'end of input';",
|
|
" ",
|
|
" return 'Expected ' + expected + ' but ' + actual + ' found.';",
|
|
" }",
|
|
" ",
|
|
" function computeErrorPosition() {",
|
|
" /*",
|
|
" * The first idea was to use |String.split| to break the input up to the",
|
|
" * error position along newlines and derive the line and column from",
|
|
" * there. However IE's |split| implementation is so broken that it was",
|
|
" * enough to prevent it.",
|
|
" */",
|
|
" ",
|
|
" var line = 1;",
|
|
" var column = 1;",
|
|
" var seenCR = false;",
|
|
" ",
|
|
" for (var i = 0; i < rightmostMatchFailuresPos; i++) {",
|
|
" var ch = input.charAt(i);",
|
|
" if (ch === '\\n') {",
|
|
" if (!seenCR) { line++; }",
|
|
" column = 1;",
|
|
" seenCR = false;",
|
|
" } else if (ch === '\\r' | ch === '\\u2028' || ch === '\\u2029') {",
|
|
" line++;",
|
|
" column = 1;",
|
|
" seenCR = true;",
|
|
" } else {",
|
|
" column++;",
|
|
" seenCR = false;",
|
|
" }",
|
|
" }",
|
|
" ",
|
|
" return { line: line, column: column };",
|
|
" }",
|
|
" ",
|
|
" ${initializerCode}",
|
|
" ",
|
|
" var result = parse_${startRule}({ reportMatchFailures: true });",
|
|
" ",
|
|
" /*",
|
|
" * The parser is now in one of the following three states:",
|
|
" *",
|
|
" * 1. The parser successfully parsed the whole input.",
|
|
" *",
|
|
" * - |result !== null|",
|
|
" * - |pos === input.length|",
|
|
" * - |rightmostMatchFailuresExpected| may or may not contain something",
|
|
" *",
|
|
" * 2. The parser successfully parsed only a part of the input.",
|
|
" *",
|
|
" * - |result !== null|",
|
|
" * - |pos < input.length|",
|
|
" * - |rightmostMatchFailuresExpected| may or may not contain something",
|
|
" *",
|
|
" * 3. The parser did not successfully parse any part of the input.",
|
|
" *",
|
|
" * - |result === null|",
|
|
" * - |pos === 0|",
|
|
" * - |rightmostMatchFailuresExpected| contains at least one failure",
|
|
" *",
|
|
" * All code following this comment (including called functions) must",
|
|
" * handle these states.",
|
|
" */",
|
|
" if (result === null || pos !== input.length) {",
|
|
" var errorPosition = computeErrorPosition();",
|
|
" throw new this.SyntaxError(",
|
|
" buildErrorMessage(),",
|
|
" errorPosition.line,",
|
|
" errorPosition.column",
|
|
" );",
|
|
" }",
|
|
" ",
|
|
" return result;",
|
|
" },",
|
|
" ",
|
|
" /* Returns the parser source code. */",
|
|
" toSource: function() { return this._source; }",
|
|
" };",
|
|
" ",
|
|
" /* Thrown when a parser encounters a syntax error. */",
|
|
" ",
|
|
" result.SyntaxError = function(message, line, column) {",
|
|
" this.name = 'SyntaxError';",
|
|
" this.message = message;",
|
|
" this.line = line;",
|
|
" this.column = column;",
|
|
" };",
|
|
" ",
|
|
" result.SyntaxError.prototype = Error.prototype;",
|
|
" ",
|
|
" return result;",
|
|
"})()",
|
|
{
|
|
initializerCode: initializerCode,
|
|
parseFunctionDefinitions: parseFunctionDefinitions.join("\n\n"),
|
|
startRule: node.startRule
|
|
}
|
|
);
|
|
},
|
|
|
|
initializer: function(node) {
|
|
return node.code;
|
|
},
|
|
|
|
rule: function(node) {
|
|
/*
|
|
* We want to reset variable names at the beginning of every function so
|
|
* that a little change in the source grammar does not change variables in
|
|
* all the generated code. This is desired especially when one has the
|
|
* generated grammar stored in a VCS (this is true e.g. for our
|
|
* metagrammar).
|
|
*/
|
|
PEG.Compiler.resetUniqueIdentifierCounters();
|
|
|
|
var resultVar = PEG.Compiler.generateUniqueIdentifier("result");
|
|
|
|
if (node.displayName !== null) {
|
|
var setReportMatchFailuresCode = PEG.Compiler.formatCode(
|
|
"var savedReportMatchFailures = context.reportMatchFailures;",
|
|
"context.reportMatchFailures = false;"
|
|
);
|
|
var restoreReportMatchFailuresCode = PEG.Compiler.formatCode(
|
|
"context.reportMatchFailures = savedReportMatchFailures;"
|
|
);
|
|
var reportMatchFailureCode = PEG.Compiler.formatCode(
|
|
"if (context.reportMatchFailures && ${resultVar} === null) {",
|
|
" matchFailed(${displayName|string});",
|
|
"}",
|
|
{
|
|
displayName: node.displayName,
|
|
resultVar: resultVar
|
|
}
|
|
);
|
|
} else {
|
|
var setReportMatchFailuresCode = "";
|
|
var restoreReportMatchFailuresCode = "";
|
|
var reportMatchFailureCode = "";
|
|
}
|
|
|
|
return PEG.Compiler.formatCode(
|
|
"function parse_${name}(context) {",
|
|
" var cacheKey = ${name|string} + '@' + pos;",
|
|
" var cachedResult = cache[cacheKey];",
|
|
" if (cachedResult) {",
|
|
" pos = cachedResult.nextPos;",
|
|
" return cachedResult.result;",
|
|
" }",
|
|
" ",
|
|
" ${setReportMatchFailuresCode}",
|
|
" ${code}",
|
|
" ${restoreReportMatchFailuresCode}",
|
|
" ${reportMatchFailureCode}",
|
|
" ",
|
|
" cache[cacheKey] = {",
|
|
" nextPos: pos,",
|
|
" result: ${resultVar}",
|
|
" };",
|
|
" return ${resultVar};",
|
|
"}",
|
|
{
|
|
name: node.name,
|
|
setReportMatchFailuresCode: setReportMatchFailuresCode,
|
|
restoreReportMatchFailuresCode: restoreReportMatchFailuresCode,
|
|
reportMatchFailureCode: reportMatchFailureCode,
|
|
code: PEG.Compiler.compileNode(node.expression, resultVar),
|
|
resultVar: resultVar
|
|
}
|
|
);
|
|
},
|
|
|
|
/*
|
|
* The contract for all code fragments generated by the following functions
|
|
* is as follows:
|
|
*
|
|
* * The code fragment should try to match a part of the input starting with
|
|
* the position indicated in |pos|. That position may point past the end of
|
|
* the input.
|
|
*
|
|
* * If the code fragment matches the input, it advances |pos| after the
|
|
* matched part of the input and sets variable with a name stored in
|
|
* |resultVar| to appropriate value, which is always non-null.
|
|
*
|
|
* * If the code fragment does not match the input, it does not change |pos|
|
|
* and it sets a variable with a name stored in |resultVar| to |null|.
|
|
*/
|
|
|
|
choice: function(node, resultVar) {
|
|
var code = PEG.Compiler.formatCode(
|
|
"var ${resultVar} = null;",
|
|
{ resultVar: resultVar }
|
|
);
|
|
|
|
for (var i = node.alternatives.length - 1; i >= 0; i--) {
|
|
var alternativeResultVar = PEG.Compiler.generateUniqueIdentifier("result");
|
|
code = PEG.Compiler.formatCode(
|
|
"${alternativeCode}",
|
|
"if (${alternativeResultVar} !== null) {",
|
|
" var ${resultVar} = ${alternativeResultVar};",
|
|
"} else {",
|
|
" ${code};",
|
|
"}",
|
|
{
|
|
alternativeCode: PEG.Compiler.compileNode(node.alternatives[i], alternativeResultVar),
|
|
alternativeResultVar: alternativeResultVar,
|
|
code: code,
|
|
resultVar: resultVar
|
|
}
|
|
);
|
|
}
|
|
|
|
return code;
|
|
},
|
|
|
|
sequence: function(node, resultVar) {
|
|
var savedPosVar = PEG.Compiler.generateUniqueIdentifier("savedPos");
|
|
|
|
var elementResultVars = PEG.ArrayUtils.map(node.elements, function() {
|
|
return PEG.Compiler.generateUniqueIdentifier("result")
|
|
});
|
|
|
|
var code = PEG.Compiler.formatCode(
|
|
"var ${resultVar} = ${elementResultVarArray};",
|
|
{
|
|
resultVar: resultVar,
|
|
elementResultVarArray: "[" + elementResultVars.join(", ") + "]"
|
|
}
|
|
);
|
|
|
|
for (var i = node.elements.length - 1; i >= 0; i--) {
|
|
code = PEG.Compiler.formatCode(
|
|
"${elementCode}",
|
|
"if (${elementResultVar} !== null) {",
|
|
" ${code}",
|
|
"} else {",
|
|
" var ${resultVar} = null;",
|
|
" pos = ${savedPosVar};",
|
|
"}",
|
|
{
|
|
elementCode: PEG.Compiler.compileNode(node.elements[i], elementResultVars[i]),
|
|
elementResultVar: elementResultVars[i],
|
|
code: code,
|
|
savedPosVar: savedPosVar,
|
|
resultVar: resultVar
|
|
}
|
|
);
|
|
}
|
|
|
|
return PEG.Compiler.formatCode(
|
|
"var ${savedPosVar} = pos;",
|
|
"${code}",
|
|
{
|
|
code: code,
|
|
savedPosVar: savedPosVar
|
|
}
|
|
);
|
|
},
|
|
|
|
labeled: function(node, resultVar) {
|
|
return PEG.Compiler.compileNode(node.expression, resultVar);
|
|
},
|
|
|
|
simple_and: function(node, resultVar) {
|
|
var savedPosVar = PEG.Compiler.generateUniqueIdentifier("savedPos");
|
|
var savedReportMatchFailuresVar = PEG.Compiler.generateUniqueIdentifier("savedReportMatchFailuresVar");
|
|
var expressionResultVar = PEG.Compiler.generateUniqueIdentifier("result");
|
|
|
|
return PEG.Compiler.formatCode(
|
|
"var ${savedPosVar} = pos;",
|
|
"var ${savedReportMatchFailuresVar} = context.reportMatchFailures;",
|
|
"context.reportMatchFailures = false;",
|
|
"${expressionCode}",
|
|
"context.reportMatchFailures = ${savedReportMatchFailuresVar};",
|
|
"if (${expressionResultVar} !== null) {",
|
|
" var ${resultVar} = '';",
|
|
" pos = ${savedPosVar};",
|
|
"} else {",
|
|
" var ${resultVar} = null;",
|
|
"}",
|
|
{
|
|
expressionCode: PEG.Compiler.compileNode(node.expression, expressionResultVar),
|
|
expressionResultVar: expressionResultVar,
|
|
savedPosVar: savedPosVar,
|
|
savedReportMatchFailuresVar: savedReportMatchFailuresVar,
|
|
resultVar: resultVar
|
|
}
|
|
);
|
|
},
|
|
|
|
simple_not: function(node, resultVar) {
|
|
var savedPosVar = PEG.Compiler.generateUniqueIdentifier("savedPos");
|
|
var savedReportMatchFailuresVar = PEG.Compiler.generateUniqueIdentifier("savedReportMatchFailuresVar");
|
|
var expressionResultVar = PEG.Compiler.generateUniqueIdentifier("result");
|
|
|
|
return PEG.Compiler.formatCode(
|
|
"var ${savedPosVar} = pos;",
|
|
"var ${savedReportMatchFailuresVar} = context.reportMatchFailures;",
|
|
"context.reportMatchFailures = false;",
|
|
"${expressionCode}",
|
|
"context.reportMatchFailures = ${savedReportMatchFailuresVar};",
|
|
"if (${expressionResultVar} === null) {",
|
|
" var ${resultVar} = '';",
|
|
"} else {",
|
|
" var ${resultVar} = null;",
|
|
" pos = ${savedPosVar};",
|
|
"}",
|
|
{
|
|
expressionCode: PEG.Compiler.compileNode(node.expression, expressionResultVar),
|
|
expressionResultVar: expressionResultVar,
|
|
savedPosVar: savedPosVar,
|
|
savedReportMatchFailuresVar: savedReportMatchFailuresVar,
|
|
resultVar: resultVar
|
|
}
|
|
);
|
|
},
|
|
|
|
semantic_and: function(node, resultVar) {
|
|
var savedPosVar = PEG.Compiler.generateUniqueIdentifier("savedPos");
|
|
|
|
return PEG.Compiler.formatCode(
|
|
"var ${resultVar} = (function() {${actionCode}})() ? '' : null;",
|
|
{
|
|
actionCode: node.code,
|
|
resultVar: resultVar
|
|
}
|
|
);
|
|
},
|
|
|
|
semantic_not: function(node, resultVar) {
|
|
var savedPosVar = PEG.Compiler.generateUniqueIdentifier("savedPos");
|
|
|
|
return PEG.Compiler.formatCode(
|
|
"var ${resultVar} = (function() {${actionCode}})() ? null : '';",
|
|
{
|
|
actionCode: node.code,
|
|
resultVar: resultVar
|
|
}
|
|
);
|
|
},
|
|
|
|
optional: function(node, resultVar) {
|
|
var expressionResultVar = PEG.Compiler.generateUniqueIdentifier("result");
|
|
|
|
return PEG.Compiler.formatCode(
|
|
"${expressionCode}",
|
|
"var ${resultVar} = ${expressionResultVar} !== null ? ${expressionResultVar} : '';",
|
|
{
|
|
expressionCode: PEG.Compiler.compileNode(node.expression, expressionResultVar),
|
|
expressionResultVar: expressionResultVar,
|
|
resultVar: resultVar
|
|
}
|
|
);
|
|
},
|
|
|
|
zero_or_more: function(node, resultVar) {
|
|
var expressionResultVar = PEG.Compiler.generateUniqueIdentifier("result");
|
|
|
|
return PEG.Compiler.formatCode(
|
|
"var ${resultVar} = [];",
|
|
"${expressionCode}",
|
|
"while (${expressionResultVar} !== null) {",
|
|
" ${resultVar}.push(${expressionResultVar});",
|
|
" ${expressionCode}",
|
|
"}",
|
|
{
|
|
expressionCode: PEG.Compiler.compileNode(node.expression, expressionResultVar),
|
|
expressionResultVar: expressionResultVar,
|
|
resultVar: resultVar
|
|
}
|
|
);
|
|
},
|
|
|
|
one_or_more: function(node, resultVar) {
|
|
var expressionResultVar = PEG.Compiler.generateUniqueIdentifier("result");
|
|
|
|
return PEG.Compiler.formatCode(
|
|
"${expressionCode}",
|
|
"if (${expressionResultVar} !== null) {",
|
|
" var ${resultVar} = [];",
|
|
" while (${expressionResultVar} !== null) {",
|
|
" ${resultVar}.push(${expressionResultVar});",
|
|
" ${expressionCode}",
|
|
" }",
|
|
"} else {",
|
|
" var ${resultVar} = null;",
|
|
"}",
|
|
{
|
|
expressionCode: PEG.Compiler.compileNode(node.expression, expressionResultVar),
|
|
expressionResultVar: expressionResultVar,
|
|
resultVar: resultVar
|
|
}
|
|
);
|
|
},
|
|
|
|
action: function(node, resultVar) {
|
|
/*
|
|
* In case of sequences, we splat their elements into function arguments
|
|
* one by one. Example:
|
|
*
|
|
* start: a:"a" b:"b" c:"c" { alert(arguments.length) } // => 3
|
|
*
|
|
* This behavior is reflected in this function.
|
|
*/
|
|
|
|
var expressionResultVar = PEG.Compiler.generateUniqueIdentifier("result");
|
|
|
|
if (node.expression.type === "sequence") {
|
|
var formalParams = [];
|
|
var actualParams = [];
|
|
|
|
var elements = node.expression.elements;
|
|
var elementsLength = elements.length;
|
|
for (var i = 0; i < elementsLength; i++) {
|
|
if (elements[i].type === "labeled") {
|
|
formalParams.push(elements[i].label);
|
|
actualParams.push(expressionResultVar + "[" + i + "]");
|
|
}
|
|
}
|
|
} else if (node.expression.type === "labeled") {
|
|
var formalParams = [node.expression.label];
|
|
var actualParams = [expressionResultVar];
|
|
} else {
|
|
var formalParams = [];
|
|
var actualParams = [];
|
|
}
|
|
|
|
return PEG.Compiler.formatCode(
|
|
"${expressionCode}",
|
|
"var ${resultVar} = ${expressionResultVar} !== null",
|
|
" ? (function(${formalParams}) {${actionCode}})(${actualParams})",
|
|
" : null;",
|
|
{
|
|
expressionCode: PEG.Compiler.compileNode(node.expression, expressionResultVar),
|
|
expressionResultVar: expressionResultVar,
|
|
actionCode: node.code,
|
|
formalParams: formalParams.join(", "),
|
|
actualParams: actualParams.join(", "),
|
|
resultVar: resultVar
|
|
}
|
|
);
|
|
},
|
|
|
|
rule_ref: function(node, resultVar) {
|
|
return PEG.Compiler.formatCode(
|
|
"var ${resultVar} = ${ruleMethod}(context);",
|
|
{
|
|
ruleMethod: "parse_" + node.name,
|
|
resultVar: resultVar
|
|
}
|
|
);
|
|
},
|
|
|
|
literal: function(node, resultVar) {
|
|
return PEG.Compiler.formatCode(
|
|
"if (input.substr(pos, ${length}) === ${value|string}) {",
|
|
" var ${resultVar} = ${value|string};",
|
|
" pos += ${length};",
|
|
"} else {",
|
|
" var ${resultVar} = null;",
|
|
" if (context.reportMatchFailures) {",
|
|
" matchFailed(quoteString(${value|string}));",
|
|
" }",
|
|
"}",
|
|
{
|
|
value: node.value,
|
|
length: node.value.length,
|
|
resultVar: resultVar
|
|
}
|
|
);
|
|
},
|
|
|
|
any: function(node, resultVar) {
|
|
return PEG.Compiler.formatCode(
|
|
"if (input.length > pos) {",
|
|
" var ${resultVar} = input.charAt(pos);",
|
|
" pos++;",
|
|
"} else {",
|
|
" var ${resultVar} = null;",
|
|
" if (context.reportMatchFailures) {",
|
|
" matchFailed('any character');",
|
|
" }",
|
|
"}",
|
|
{ resultVar: resultVar }
|
|
);
|
|
},
|
|
|
|
"class": function(node, resultVar) {
|
|
if (node.parts.length > 0) {
|
|
var regexp = "/^["
|
|
+ (node.inverted ? "^" : "")
|
|
+ PEG.ArrayUtils.map(node.parts, function(part) {
|
|
return part instanceof Array
|
|
? PEG.RegExpUtils.quoteForClass(part[0])
|
|
+ "-"
|
|
+ PEG.RegExpUtils.quoteForClass(part[1])
|
|
: PEG.RegExpUtils.quoteForClass(part);
|
|
}).join("")
|
|
+ "]/";
|
|
} else {
|
|
/*
|
|
* Stupid IE considers regexps /[]/ and /[^]/ syntactically invalid, so
|
|
* we translate them into euqivalents it can handle.
|
|
*/
|
|
var regexp = node.inverted ? "/^[\\S\\s]/" : "/^(?!)/";
|
|
}
|
|
|
|
return PEG.Compiler.formatCode(
|
|
"if (input.substr(pos).match(${regexp}) !== null) {",
|
|
" var ${resultVar} = input.charAt(pos);",
|
|
" pos++;",
|
|
"} else {",
|
|
" var ${resultVar} = null;",
|
|
" if (context.reportMatchFailures) {",
|
|
" matchFailed(${rawText|string});",
|
|
" }",
|
|
"}",
|
|
{
|
|
regexp: regexp,
|
|
rawText: node.rawText,
|
|
resultVar: resultVar
|
|
}
|
|
);
|
|
}
|
|
},
|
|
|
|
/*
|
|
* Compiles an AST node and returns the generated code. The |resultVar|
|
|
* parameter contains a name of variable in which the match result will be
|
|
* stored in the generated code.
|
|
*/
|
|
compileNode: function(node, resultVar) {
|
|
return this._compileFunctions[node.type](node, resultVar);
|
|
},
|
|
|
|
/*
|
|
* Generates a parser from a specified grammar AST. Throws |PEG.GrammarError|
|
|
* if the AST contains a semantic error. Note that not all errors are detected
|
|
* during the generation and some may protrude to the generated parser and
|
|
* cause its malfunction.
|
|
*/
|
|
compileParser: function(ast) {
|
|
for (var i = 0; i < this._checks.length; i++) {
|
|
this._checks[i](ast);
|
|
}
|
|
|
|
for (var i = 0; i < this._passes.length; i++) {
|
|
ast = this._passes[i](ast);
|
|
}
|
|
|
|
var source = this.compileNode(ast);
|
|
var result = eval(source);
|
|
result._source = source;
|
|
|
|
return result;
|
|
}
|
|
};
|
|
|
|
})();
|