Treat the whole grammar as an AST node

redux
David Majda 15 years ago
parent 917cf1cf2a
commit 4895f4f8e4

@ -275,6 +275,13 @@ PEG.Compiler = {
}
var checkFunctions = {
grammar:
function(node) {
for (var name in node.rules) {
check(node.rules[name]);
}
},
rule: checkExpression,
choice: checkSubnodes("alternatives"),
sequence: checkSubnodes("elements"),
@ -302,9 +309,7 @@ PEG.Compiler = {
function check(node) { checkFunctions[node.type](node); }
for (var rule in ast.rules) {
check(ast.rules[rule]);
}
check(ast);
},
/* Checks that no left recursion is present. */
@ -316,6 +321,13 @@ PEG.Compiler = {
}
var checkFunctions = {
grammar:
function(node, appliedRules) {
for (var name in node.rules) {
check(ast.rules[name], appliedRules);
}
},
rule:
function(node, appliedRules) {
check(node.expression, appliedRules.concat(node.name));
@ -362,9 +374,7 @@ PEG.Compiler = {
checkFunctions[node.type](node, appliedRules);
}
for (var rule in ast.rules) {
check(ast.rules[rule], []);
}
check(ast, []);
}
],
@ -399,6 +409,13 @@ PEG.Compiler = {
}
var replaceFunctions = {
grammar:
function(node, from, to) {
for (var name in node.rules) {
replace(ast.rules[name], from, to);
}
},
rule: replaceInExpression,
choice: replaceInSubnodes("alternatives"),
sequence: replaceInSubnodes("elements"),
@ -426,18 +443,16 @@ PEG.Compiler = {
replaceFunctions[node.type](node, from, to);
}
for (var rule in ast.rules) {
replace(ast.rules[rule], from, to);
}
replace(ast, from, to);
}
for (var rule in ast.rules) {
if (isProxyRule(ast.rules[rule])) {
replaceRuleRefs(ast, ast.rules[rule].name, ast.rules[rule].expression.name);
if (rule === ast.startRule) {
ast.startRule = ast.rules[rule].expression.name;
for (var name in ast.rules) {
if (isProxyRule(ast.rules[name])) {
replaceRuleRefs(ast, ast.rules[name].name, ast.rules[name].expression.name);
if (name === ast.startRule) {
ast.startRule = ast.rules[name].expression.name;
}
delete ast.rules[rule];
delete ast.rules[name];
}
}
@ -446,6 +461,203 @@ PEG.Compiler = {
],
_compileFunctions: {
grammar: function(node) {
var initializerCode = node.initializer !== null
? PEG.Compiler.compileNode(node.initializer)
: "";
var parseFunctionDefinitions = [];
for (var name in node.rules) {
parseFunctionDefinitions.push(PEG.Compiler.compileNode(node.rules[name]));
}
return PEG.Compiler.formatCode(
"(function(){",
" /* Generated by PEG.js (http://pegjs.majda.cz/). */",
" ",
" var result = {",
" /*",
" * Parses the input with a generated parser. If the parsing is successfull,",
" * returns a value explicitly or implicitly specified by the grammar from",
" * which the parser was generated (see |PEG.buildParser|). If the parsing is",
" * unsuccessful, throws |PEG.grammarParser.SyntaxError| describing the error.",
" */",
" parse: function(input) {",
" var pos = 0;",
" var rightmostMatchFailuresPos = 0;",
" var rightmostMatchFailuresExpected = [];",
" var cache = {};",
" ",
/* This needs to be in sync with PEG.StringUtils.quote. */
" function quoteString(s) {",
" /*",
" * ECMA-262, 5th ed., 7.8.4: All characters may appear literally in a",
" * string literal except for the closing quote character, backslash,",
" * carriage return, line separator, paragraph separator, and line feed.",
" * Any character may appear in the form of an escape sequence.",
" */",
" return '\"' + s",
" .replace(/\\\\/g, '\\\\\\\\') // backslash",
" .replace(/\"/g, '\\\\\"') // closing quote character",
" .replace(/\\r/g, '\\\\r') // carriage return",
" .replace(/\\u2028/g, '\\\\u2028') // line separator",
" .replace(/\\u2029/g, '\\\\u2029') // paragraph separator",
" .replace(/\\n/g, '\\\\n') // line feed",
" + '\"';",
" }",
" ",
/* This needs to be in sync with PEG.ArrayUtils.contains. */
" function arrayContains(array, value) {",
" /*",
" * Stupid IE does not have Array.prototype.indexOf, otherwise this",
" * function would be a one-liner.",
" */",
" var length = array.length;",
" for (var i = 0; i < length; i++) {",
" if (array[i] === value) {",
" return true;",
" }",
" }",
" return false;",
" }",
" ",
" function matchFailed(failure) {",
" if (pos < rightmostMatchFailuresPos) {",
" return;",
" }",
" ",
" if (pos > rightmostMatchFailuresPos) {",
" rightmostMatchFailuresPos = pos;",
" rightmostMatchFailuresExpected = [];",
" }",
" ",
" if (!arrayContains(rightmostMatchFailuresExpected, failure)) {",
" rightmostMatchFailuresExpected.push(failure);",
" }",
" }",
" ",
" ${parseFunctionDefinitions}",
" ",
" function buildErrorMessage() {",
" function buildExpected(failuresExpected) {",
" switch (failuresExpected.length) {",
" case 0:",
" return 'end of input';",
" case 1:",
" return failuresExpected[0];",
" default:",
" failuresExpected.sort();",
" return failuresExpected.slice(0, failuresExpected.length - 1).join(', ')",
" + ' or '",
" + failuresExpected[failuresExpected.length - 1];",
" }",
" }",
" ",
" var expected = buildExpected(rightmostMatchFailuresExpected);",
" var actualPos = Math.max(pos, rightmostMatchFailuresPos);",
" var actual = actualPos < input.length",
" ? quoteString(input.charAt(actualPos))",
" : 'end of input';",
" ",
" return 'Expected ' + expected + ' but ' + actual + ' found.';",
" }",
" ",
" function computeErrorPosition() {",
" /*",
" * The first idea was to use |String.split| to break the input up to the",
" * error position along newlines and derive the line and column from",
" * there. However IE's |split| implementation is so broken that it was",
" * enough to prevent it.",
" */",
" ",
" var line = 1;",
" var column = 1;",
" var seenCR = false;",
" ",
" for (var i = 0; i < rightmostMatchFailuresPos; i++) {",
" var ch = input.charAt(i);",
" if (ch === '\\n') {",
" if (!seenCR) { line++; }",
" column = 1;",
" seenCR = false;",
" } else if (ch === '\\r' | ch === '\\u2028' || ch === '\\u2029') {",
" line++;",
" column = 1;",
" seenCR = true;",
" } else {",
" column++;",
" seenCR = false;",
" }",
" }",
" ",
" return { line: line, column: column };",
" }",
" ",
" ${initializerCode}",
" ",
" var result = parse_${startRule}({ reportMatchFailures: true });",
" ",
" /*",
" * The parser is now in one of the following three states:",
" *",
" * 1. The parser successfully parsed the whole input.",
" *",
" * - |result !== null|",
" * - |pos === input.length|",
" * - |rightmostMatchFailuresExpected| may or may not contain something",
" *",
" * 2. The parser successfully parsed only a part of the input.",
" *",
" * - |result !== null|",
" * - |pos < input.length|",
" * - |rightmostMatchFailuresExpected| may or may not contain something",
" *",
" * 3. The parser did not successfully parse any part of the input.",
" *",
" * - |result === null|",
" * - |pos === 0|",
" * - |rightmostMatchFailuresExpected| contains at least one failure",
" *",
" * All code following this comment (including called functions) must",
" * handle these states.",
" */",
" if (result === null || pos !== input.length) {",
" var errorPosition = computeErrorPosition();",
" throw new this.SyntaxError(",
" buildErrorMessage(),",
" errorPosition.line,",
" errorPosition.column",
" );",
" }",
" ",
" return result;",
" },",
" ",
" /* Returns the parser source code. */",
" toSource: function() { return this._source; }",
" };",
" ",
" /* Thrown when a parser encounters a syntax error. */",
" ",
" result.SyntaxError = function(message, line, column) {",
" this.name = 'SyntaxError';",
" this.message = message;",
" this.line = line;",
" this.column = column;",
" };",
" ",
" result.SyntaxError.prototype = Error.prototype;",
" ",
" return result;",
"})()",
{
initializerCode: initializerCode,
parseFunctionDefinitions: parseFunctionDefinitions.join("\n\n"),
startRule: node.startRule
}
);
},
initializer: function(node) {
return node.code;
},
@ -870,203 +1082,10 @@ PEG.Compiler = {
ast = this._passes[i](ast);
}
var initializerCode = ast.initializer !== null
? this.compileNode(ast.initializer)
: "";
var parseFunctionDefinitions = [];
for (var rule in ast.rules) {
parseFunctionDefinitions.push(this.compileNode(ast.rules[rule]));
}
var source = this.formatCode(
"(function(){",
" /* Generated by PEG.js (http://pegjs.majda.cz/). */",
" ",
" var result = {",
" /*",
" * Parses the input with a generated parser. If the parsing is successfull,",
" * returns a value explicitly or implicitly specified by the grammar from",
" * which the parser was generated (see |PEG.buildParser|). If the parsing is",
" * unsuccessful, throws |PEG.grammarParser.SyntaxError| describing the error.",
" */",
" parse: function(input) {",
" var pos = 0;",
" var rightmostMatchFailuresPos = 0;",
" var rightmostMatchFailuresExpected = [];",
" var cache = {};",
" ",
/* This needs to be in sync with PEG.StringUtils.quote. */
" function quoteString(s) {",
" /*",
" * ECMA-262, 5th ed., 7.8.4: All characters may appear literally in a",
" * string literal except for the closing quote character, backslash,",
" * carriage return, line separator, paragraph separator, and line feed.",
" * Any character may appear in the form of an escape sequence.",
" */",
" return '\"' + s",
" .replace(/\\\\/g, '\\\\\\\\') // backslash",
" .replace(/\"/g, '\\\\\"') // closing quote character",
" .replace(/\\r/g, '\\\\r') // carriage return",
" .replace(/\\u2028/g, '\\\\u2028') // line separator",
" .replace(/\\u2029/g, '\\\\u2029') // paragraph separator",
" .replace(/\\n/g, '\\\\n') // line feed",
" + '\"';",
" }",
" ",
/* This needs to be in sync with PEG.ArrayUtils.contains. */
" function arrayContains(array, value) {",
" /*",
" * Stupid IE does not have Array.prototype.indexOf, otherwise this",
" * function would be a one-liner.",
" */",
" var length = array.length;",
" for (var i = 0; i < length; i++) {",
" if (array[i] === value) {",
" return true;",
" }",
" }",
" return false;",
" }",
" ",
" function matchFailed(failure) {",
" if (pos < rightmostMatchFailuresPos) {",
" return;",
" }",
" ",
" if (pos > rightmostMatchFailuresPos) {",
" rightmostMatchFailuresPos = pos;",
" rightmostMatchFailuresExpected = [];",
" }",
" ",
" if (!arrayContains(rightmostMatchFailuresExpected, failure)) {",
" rightmostMatchFailuresExpected.push(failure);",
" }",
" }",
" ",
" ${parseFunctionDefinitions}",
" ",
" function buildErrorMessage() {",
" function buildExpected(failuresExpected) {",
" switch (failuresExpected.length) {",
" case 0:",
" return 'end of input';",
" case 1:",
" return failuresExpected[0];",
" default:",
" failuresExpected.sort();",
" return failuresExpected.slice(0, failuresExpected.length - 1).join(', ')",
" + ' or '",
" + failuresExpected[failuresExpected.length - 1];",
" }",
" }",
" ",
" var expected = buildExpected(rightmostMatchFailuresExpected);",
" var actualPos = Math.max(pos, rightmostMatchFailuresPos);",
" var actual = actualPos < input.length",
" ? quoteString(input.charAt(actualPos))",
" : 'end of input';",
" ",
" return 'Expected ' + expected + ' but ' + actual + ' found.';",
" }",
" ",
" function computeErrorPosition() {",
" /*",
" * The first idea was to use |String.split| to break the input up to the",
" * error position along newlines and derive the line and column from",
" * there. However IE's |split| implementation is so broken that it was",
" * enough to prevent it.",
" */",
" ",
" var line = 1;",
" var column = 1;",
" var seenCR = false;",
" ",
" for (var i = 0; i < rightmostMatchFailuresPos; i++) {",
" var ch = input.charAt(i);",
" if (ch === '\\n') {",
" if (!seenCR) { line++; }",
" column = 1;",
" seenCR = false;",
" } else if (ch === '\\r' | ch === '\\u2028' || ch === '\\u2029') {",
" line++;",
" column = 1;",
" seenCR = true;",
" } else {",
" column++;",
" seenCR = false;",
" }",
" }",
" ",
" return { line: line, column: column };",
" }",
" ",
" ${initializerCode}",
" ",
" var result = parse_${startRule}({ reportMatchFailures: true });",
" ",
" /*",
" * The parser is now in one of the following three states:",
" *",
" * 1. The parser successfully parsed the whole input.",
" *",
" * - |result !== null|",
" * - |pos === input.length|",
" * - |rightmostMatchFailuresExpected| may or may not contain something",
" *",
" * 2. The parser successfully parsed only a part of the input.",
" *",
" * - |result !== null|",
" * - |pos < input.length|",
" * - |rightmostMatchFailuresExpected| may or may not contain something",
" *",
" * 3. The parser did not successfully parse any part of the input.",
" *",
" * - |result === null|",
" * - |pos === 0|",
" * - |rightmostMatchFailuresExpected| contains at least one failure",
" *",
" * All code following this comment (including called functions) must",
" * handle these states.",
" */",
" if (result === null || pos !== input.length) {",
" var errorPosition = computeErrorPosition();",
" throw new this.SyntaxError(",
" buildErrorMessage(),",
" errorPosition.line,",
" errorPosition.column",
" );",
" }",
" ",
" return result;",
" },",
" ",
" /* Returns the parser source code. */",
" toSource: function() { return this._source; }",
" };",
" ",
" /* Thrown when a parser encounters a syntax error. */",
" ",
" result.SyntaxError = function(message, line, column) {",
" this.name = 'SyntaxError';",
" this.message = message;",
" this.line = line;",
" this.column = column;",
" };",
" ",
" result.SyntaxError.prototype = Error.prototype;",
" ",
" return result;",
"})()",
{
initializerCode: initializerCode,
parseFunctionDefinitions: parseFunctionDefinitions.join("\n\n"),
startRule: ast.startRule
}
);
var source = this.compileNode(ast);
var result = eval(source);
result._source = source;
return result;
}
};

@ -105,6 +105,7 @@ PEG.grammarParser = (function(){
PEG.ArrayUtils.each(rules, function(rule) { rulesConverted[rule.name] = rule; });
return {
type: "grammar",
initializer: initializer !== "" ? initializer : null,
rules: rulesConverted,
startRule: rules[0].name

@ -4,6 +4,7 @@ grammar
PEG.ArrayUtils.each(rules, function(rule) { rulesConverted[rule.name] = rule; });
return {
type: "grammar",
initializer: initializer !== "" ? initializer : null,
rules: rulesConverted,
startRule: rules[0].name

@ -127,6 +127,7 @@ var choiceLiterals = choice([literalAbcd, literalEfgh, literalIjkl]);
function oneRuleGrammar(expression) {
return {
type: "grammar",
initializer: null,
rules: { start: rule("start", null, expression) },
startRule: "start"
@ -156,6 +157,7 @@ function actionGrammar(action) {
}
var initializerGrammar = {
type: "grammar",
initializer: initializer(" code "),
rules: {
a: rule("a", null, literalAbcd),
@ -168,6 +170,7 @@ test("parses grammar", function() {
grammarParserParses(
'a = "abcd"',
{
type: "grammar",
initializer: null,
rules: { a: rule("a", null, literalAbcd) },
startRule: "a"
@ -177,6 +180,7 @@ test("parses grammar", function() {
grammarParserParses(
'a = "abcd"; b = "efgh"; c = "ijkl"',
{
type: "grammar",
initializer: null,
rules: {
a: rule("a", null, literalAbcd),
@ -203,6 +207,7 @@ test("parses rule", function() {
grammarParserParses(
'start "start rule" = "abcd" / "efgh" / "ijkl"',
{
type: "grammar",
initializer: null,
rules: { start: rule("start", "start rule", choiceLiterals) },
startRule: "start"

Loading…
Cancel
Save