diff --git a/lib/compiler.js b/lib/compiler.js index 0557086..19c9120 100644 --- a/lib/compiler.js +++ b/lib/compiler.js @@ -275,6 +275,13 @@ PEG.Compiler = { } var checkFunctions = { + grammar: + function(node) { + for (var name in node.rules) { + check(node.rules[name]); + } + }, + rule: checkExpression, choice: checkSubnodes("alternatives"), sequence: checkSubnodes("elements"), @@ -302,9 +309,7 @@ PEG.Compiler = { function check(node) { checkFunctions[node.type](node); } - for (var rule in ast.rules) { - check(ast.rules[rule]); - } + check(ast); }, /* Checks that no left recursion is present. */ @@ -316,6 +321,13 @@ PEG.Compiler = { } var checkFunctions = { + grammar: + function(node, appliedRules) { + for (var name in node.rules) { + check(ast.rules[name], appliedRules); + } + }, + rule: function(node, appliedRules) { check(node.expression, appliedRules.concat(node.name)); @@ -362,9 +374,7 @@ PEG.Compiler = { checkFunctions[node.type](node, appliedRules); } - for (var rule in ast.rules) { - check(ast.rules[rule], []); - } + check(ast, []); } ], @@ -399,6 +409,13 @@ PEG.Compiler = { } var replaceFunctions = { + grammar: + function(node, from, to) { + for (var name in node.rules) { + replace(ast.rules[name], from, to); + } + }, + rule: replaceInExpression, choice: replaceInSubnodes("alternatives"), sequence: replaceInSubnodes("elements"), @@ -426,18 +443,16 @@ PEG.Compiler = { replaceFunctions[node.type](node, from, to); } - for (var rule in ast.rules) { - replace(ast.rules[rule], from, to); - } + replace(ast, from, to); } - for (var rule in ast.rules) { - if (isProxyRule(ast.rules[rule])) { - replaceRuleRefs(ast, ast.rules[rule].name, ast.rules[rule].expression.name); - if (rule === ast.startRule) { - ast.startRule = ast.rules[rule].expression.name; + for (var name in ast.rules) { + if (isProxyRule(ast.rules[name])) { + replaceRuleRefs(ast, ast.rules[name].name, ast.rules[name].expression.name); + if (name === ast.startRule) { + ast.startRule = ast.rules[name].expression.name; } - delete ast.rules[rule]; + delete ast.rules[name]; } } @@ -446,6 +461,203 @@ PEG.Compiler = { ], _compileFunctions: { + grammar: function(node) { + var initializerCode = node.initializer !== null + ? PEG.Compiler.compileNode(node.initializer) + : ""; + + var parseFunctionDefinitions = []; + for (var name in node.rules) { + parseFunctionDefinitions.push(PEG.Compiler.compileNode(node.rules[name])); + } + + return PEG.Compiler.formatCode( + "(function(){", + " /* Generated by PEG.js (http://pegjs.majda.cz/). */", + " ", + " var result = {", + " /*", + " * Parses the input with a generated parser. If the parsing is successfull,", + " * returns a value explicitly or implicitly specified by the grammar from", + " * which the parser was generated (see |PEG.buildParser|). If the parsing is", + " * unsuccessful, throws |PEG.grammarParser.SyntaxError| describing the error.", + " */", + " parse: function(input) {", + " var pos = 0;", + " var rightmostMatchFailuresPos = 0;", + " var rightmostMatchFailuresExpected = [];", + " var cache = {};", + " ", + /* This needs to be in sync with PEG.StringUtils.quote. */ + " function quoteString(s) {", + " /*", + " * ECMA-262, 5th ed., 7.8.4: All characters may appear literally in a", + " * string literal except for the closing quote character, backslash,", + " * carriage return, line separator, paragraph separator, and line feed.", + " * Any character may appear in the form of an escape sequence.", + " */", + " return '\"' + s", + " .replace(/\\\\/g, '\\\\\\\\') // backslash", + " .replace(/\"/g, '\\\\\"') // closing quote character", + " .replace(/\\r/g, '\\\\r') // carriage return", + " .replace(/\\u2028/g, '\\\\u2028') // line separator", + " .replace(/\\u2029/g, '\\\\u2029') // paragraph separator", + " .replace(/\\n/g, '\\\\n') // line feed", + " + '\"';", + " }", + " ", + /* This needs to be in sync with PEG.ArrayUtils.contains. */ + " function arrayContains(array, value) {", + " /*", + " * Stupid IE does not have Array.prototype.indexOf, otherwise this", + " * function would be a one-liner.", + " */", + " var length = array.length;", + " for (var i = 0; i < length; i++) {", + " if (array[i] === value) {", + " return true;", + " }", + " }", + " return false;", + " }", + " ", + " function matchFailed(failure) {", + " if (pos < rightmostMatchFailuresPos) {", + " return;", + " }", + " ", + " if (pos > rightmostMatchFailuresPos) {", + " rightmostMatchFailuresPos = pos;", + " rightmostMatchFailuresExpected = [];", + " }", + " ", + " if (!arrayContains(rightmostMatchFailuresExpected, failure)) {", + " rightmostMatchFailuresExpected.push(failure);", + " }", + " }", + " ", + " ${parseFunctionDefinitions}", + " ", + " function buildErrorMessage() {", + " function buildExpected(failuresExpected) {", + " switch (failuresExpected.length) {", + " case 0:", + " return 'end of input';", + " case 1:", + " return failuresExpected[0];", + " default:", + " failuresExpected.sort();", + " return failuresExpected.slice(0, failuresExpected.length - 1).join(', ')", + " + ' or '", + " + failuresExpected[failuresExpected.length - 1];", + " }", + " }", + " ", + " var expected = buildExpected(rightmostMatchFailuresExpected);", + " var actualPos = Math.max(pos, rightmostMatchFailuresPos);", + " var actual = actualPos < input.length", + " ? quoteString(input.charAt(actualPos))", + " : 'end of input';", + " ", + " return 'Expected ' + expected + ' but ' + actual + ' found.';", + " }", + " ", + " function computeErrorPosition() {", + " /*", + " * The first idea was to use |String.split| to break the input up to the", + " * error position along newlines and derive the line and column from", + " * there. However IE's |split| implementation is so broken that it was", + " * enough to prevent it.", + " */", + " ", + " var line = 1;", + " var column = 1;", + " var seenCR = false;", + " ", + " for (var i = 0; i < rightmostMatchFailuresPos; i++) {", + " var ch = input.charAt(i);", + " if (ch === '\\n') {", + " if (!seenCR) { line++; }", + " column = 1;", + " seenCR = false;", + " } else if (ch === '\\r' | ch === '\\u2028' || ch === '\\u2029') {", + " line++;", + " column = 1;", + " seenCR = true;", + " } else {", + " column++;", + " seenCR = false;", + " }", + " }", + " ", + " return { line: line, column: column };", + " }", + " ", + " ${initializerCode}", + " ", + " var result = parse_${startRule}({ reportMatchFailures: true });", + " ", + " /*", + " * The parser is now in one of the following three states:", + " *", + " * 1. The parser successfully parsed the whole input.", + " *", + " * - |result !== null|", + " * - |pos === input.length|", + " * - |rightmostMatchFailuresExpected| may or may not contain something", + " *", + " * 2. The parser successfully parsed only a part of the input.", + " *", + " * - |result !== null|", + " * - |pos < input.length|", + " * - |rightmostMatchFailuresExpected| may or may not contain something", + " *", + " * 3. The parser did not successfully parse any part of the input.", + " *", + " * - |result === null|", + " * - |pos === 0|", + " * - |rightmostMatchFailuresExpected| contains at least one failure", + " *", + " * All code following this comment (including called functions) must", + " * handle these states.", + " */", + " if (result === null || pos !== input.length) {", + " var errorPosition = computeErrorPosition();", + " throw new this.SyntaxError(", + " buildErrorMessage(),", + " errorPosition.line,", + " errorPosition.column", + " );", + " }", + " ", + " return result;", + " },", + " ", + " /* Returns the parser source code. */", + " toSource: function() { return this._source; }", + " };", + " ", + " /* Thrown when a parser encounters a syntax error. */", + " ", + " result.SyntaxError = function(message, line, column) {", + " this.name = 'SyntaxError';", + " this.message = message;", + " this.line = line;", + " this.column = column;", + " };", + " ", + " result.SyntaxError.prototype = Error.prototype;", + " ", + " return result;", + "})()", + { + initializerCode: initializerCode, + parseFunctionDefinitions: parseFunctionDefinitions.join("\n\n"), + startRule: node.startRule + } + ); + }, + initializer: function(node) { return node.code; }, @@ -870,203 +1082,10 @@ PEG.Compiler = { ast = this._passes[i](ast); } - var initializerCode = ast.initializer !== null - ? this.compileNode(ast.initializer) - : ""; - - var parseFunctionDefinitions = []; - for (var rule in ast.rules) { - parseFunctionDefinitions.push(this.compileNode(ast.rules[rule])); - } - - var source = this.formatCode( - "(function(){", - " /* Generated by PEG.js (http://pegjs.majda.cz/). */", - " ", - " var result = {", - " /*", - " * Parses the input with a generated parser. If the parsing is successfull,", - " * returns a value explicitly or implicitly specified by the grammar from", - " * which the parser was generated (see |PEG.buildParser|). If the parsing is", - " * unsuccessful, throws |PEG.grammarParser.SyntaxError| describing the error.", - " */", - " parse: function(input) {", - " var pos = 0;", - " var rightmostMatchFailuresPos = 0;", - " var rightmostMatchFailuresExpected = [];", - " var cache = {};", - " ", - /* This needs to be in sync with PEG.StringUtils.quote. */ - " function quoteString(s) {", - " /*", - " * ECMA-262, 5th ed., 7.8.4: All characters may appear literally in a", - " * string literal except for the closing quote character, backslash,", - " * carriage return, line separator, paragraph separator, and line feed.", - " * Any character may appear in the form of an escape sequence.", - " */", - " return '\"' + s", - " .replace(/\\\\/g, '\\\\\\\\') // backslash", - " .replace(/\"/g, '\\\\\"') // closing quote character", - " .replace(/\\r/g, '\\\\r') // carriage return", - " .replace(/\\u2028/g, '\\\\u2028') // line separator", - " .replace(/\\u2029/g, '\\\\u2029') // paragraph separator", - " .replace(/\\n/g, '\\\\n') // line feed", - " + '\"';", - " }", - " ", - /* This needs to be in sync with PEG.ArrayUtils.contains. */ - " function arrayContains(array, value) {", - " /*", - " * Stupid IE does not have Array.prototype.indexOf, otherwise this", - " * function would be a one-liner.", - " */", - " var length = array.length;", - " for (var i = 0; i < length; i++) {", - " if (array[i] === value) {", - " return true;", - " }", - " }", - " return false;", - " }", - " ", - " function matchFailed(failure) {", - " if (pos < rightmostMatchFailuresPos) {", - " return;", - " }", - " ", - " if (pos > rightmostMatchFailuresPos) {", - " rightmostMatchFailuresPos = pos;", - " rightmostMatchFailuresExpected = [];", - " }", - " ", - " if (!arrayContains(rightmostMatchFailuresExpected, failure)) {", - " rightmostMatchFailuresExpected.push(failure);", - " }", - " }", - " ", - " ${parseFunctionDefinitions}", - " ", - " function buildErrorMessage() {", - " function buildExpected(failuresExpected) {", - " switch (failuresExpected.length) {", - " case 0:", - " return 'end of input';", - " case 1:", - " return failuresExpected[0];", - " default:", - " failuresExpected.sort();", - " return failuresExpected.slice(0, failuresExpected.length - 1).join(', ')", - " + ' or '", - " + failuresExpected[failuresExpected.length - 1];", - " }", - " }", - " ", - " var expected = buildExpected(rightmostMatchFailuresExpected);", - " var actualPos = Math.max(pos, rightmostMatchFailuresPos);", - " var actual = actualPos < input.length", - " ? quoteString(input.charAt(actualPos))", - " : 'end of input';", - " ", - " return 'Expected ' + expected + ' but ' + actual + ' found.';", - " }", - " ", - " function computeErrorPosition() {", - " /*", - " * The first idea was to use |String.split| to break the input up to the", - " * error position along newlines and derive the line and column from", - " * there. However IE's |split| implementation is so broken that it was", - " * enough to prevent it.", - " */", - " ", - " var line = 1;", - " var column = 1;", - " var seenCR = false;", - " ", - " for (var i = 0; i < rightmostMatchFailuresPos; i++) {", - " var ch = input.charAt(i);", - " if (ch === '\\n') {", - " if (!seenCR) { line++; }", - " column = 1;", - " seenCR = false;", - " } else if (ch === '\\r' | ch === '\\u2028' || ch === '\\u2029') {", - " line++;", - " column = 1;", - " seenCR = true;", - " } else {", - " column++;", - " seenCR = false;", - " }", - " }", - " ", - " return { line: line, column: column };", - " }", - " ", - " ${initializerCode}", - " ", - " var result = parse_${startRule}({ reportMatchFailures: true });", - " ", - " /*", - " * The parser is now in one of the following three states:", - " *", - " * 1. The parser successfully parsed the whole input.", - " *", - " * - |result !== null|", - " * - |pos === input.length|", - " * - |rightmostMatchFailuresExpected| may or may not contain something", - " *", - " * 2. The parser successfully parsed only a part of the input.", - " *", - " * - |result !== null|", - " * - |pos < input.length|", - " * - |rightmostMatchFailuresExpected| may or may not contain something", - " *", - " * 3. The parser did not successfully parse any part of the input.", - " *", - " * - |result === null|", - " * - |pos === 0|", - " * - |rightmostMatchFailuresExpected| contains at least one failure", - " *", - " * All code following this comment (including called functions) must", - " * handle these states.", - " */", - " if (result === null || pos !== input.length) {", - " var errorPosition = computeErrorPosition();", - " throw new this.SyntaxError(", - " buildErrorMessage(),", - " errorPosition.line,", - " errorPosition.column", - " );", - " }", - " ", - " return result;", - " },", - " ", - " /* Returns the parser source code. */", - " toSource: function() { return this._source; }", - " };", - " ", - " /* Thrown when a parser encounters a syntax error. */", - " ", - " result.SyntaxError = function(message, line, column) {", - " this.name = 'SyntaxError';", - " this.message = message;", - " this.line = line;", - " this.column = column;", - " };", - " ", - " result.SyntaxError.prototype = Error.prototype;", - " ", - " return result;", - "})()", - { - initializerCode: initializerCode, - parseFunctionDefinitions: parseFunctionDefinitions.join("\n\n"), - startRule: ast.startRule - } - ); - + var source = this.compileNode(ast); var result = eval(source); result._source = source; + return result; } }; diff --git a/lib/metagrammar.js b/lib/metagrammar.js index 569978f..d23fbbb 100644 --- a/lib/metagrammar.js +++ b/lib/metagrammar.js @@ -105,6 +105,7 @@ PEG.grammarParser = (function(){ PEG.ArrayUtils.each(rules, function(rule) { rulesConverted[rule.name] = rule; }); return { + type: "grammar", initializer: initializer !== "" ? initializer : null, rules: rulesConverted, startRule: rules[0].name diff --git a/lib/metagrammar.pegjs b/lib/metagrammar.pegjs index 6878e4d..fe85187 100644 --- a/lib/metagrammar.pegjs +++ b/lib/metagrammar.pegjs @@ -4,6 +4,7 @@ grammar PEG.ArrayUtils.each(rules, function(rule) { rulesConverted[rule.name] = rule; }); return { + type: "grammar", initializer: initializer !== "" ? initializer : null, rules: rulesConverted, startRule: rules[0].name diff --git a/test/metagrammar-test.js b/test/metagrammar-test.js index 5420f6e..13a9bb1 100644 --- a/test/metagrammar-test.js +++ b/test/metagrammar-test.js @@ -127,6 +127,7 @@ var choiceLiterals = choice([literalAbcd, literalEfgh, literalIjkl]); function oneRuleGrammar(expression) { return { + type: "grammar", initializer: null, rules: { start: rule("start", null, expression) }, startRule: "start" @@ -156,6 +157,7 @@ function actionGrammar(action) { } var initializerGrammar = { + type: "grammar", initializer: initializer(" code "), rules: { a: rule("a", null, literalAbcd), @@ -168,6 +170,7 @@ test("parses grammar", function() { grammarParserParses( 'a = "abcd"', { + type: "grammar", initializer: null, rules: { a: rule("a", null, literalAbcd) }, startRule: "a" @@ -177,6 +180,7 @@ test("parses grammar", function() { grammarParserParses( 'a = "abcd"; b = "efgh"; c = "ijkl"', { + type: "grammar", initializer: null, rules: { a: rule("a", null, literalAbcd), @@ -203,6 +207,7 @@ test("parses rule", function() { grammarParserParses( 'start "start rule" = "abcd" / "efgh" / "ijkl"', { + type: "grammar", initializer: null, rules: { start: rule("start", "start rule", choiceLiterals) }, startRule: "start"