Treat the whole grammar as an AST node

14 years ago · 4895f4f8e4
parent 917cf1cf2a
commit 4895f4f8e4
4 changed files with 236 additions and 210 deletions
--- a/lib/compiler.js
+++ b/lib/compiler.js
@ -275,6 +275,13 @@ PEG.Compiler = {
      }

      var checkFunctions = {
+        grammar:
+          function(node) {
+            for (var name in node.rules) {
+              check(node.rules[name]);
+            }
+          },
+
        rule:          checkExpression,
        choice:        checkSubnodes("alternatives"),
        sequence:      checkSubnodes("elements"),
@ -302,9 +309,7 @@ PEG.Compiler = {

      function check(node) { checkFunctions[node.type](node); }

-      for (var rule in ast.rules) {
-        check(ast.rules[rule]);
-      }
+      check(ast);
    },

    /* Checks that no left recursion is present. */
@ -316,6 +321,13 @@ PEG.Compiler = {
      }

      var checkFunctions = {
+        grammar:
+          function(node, appliedRules) {
+            for (var name in node.rules) {
+              check(ast.rules[name], appliedRules);
+            }
+          },
+
        rule:
          function(node, appliedRules) {
            check(node.expression, appliedRules.concat(node.name));
@ -362,9 +374,7 @@ PEG.Compiler = {
        checkFunctions[node.type](node, appliedRules);
      }

-      for (var rule in ast.rules) {
-        check(ast.rules[rule], []);
-      }
+      check(ast, []);
    }
  ],

@ -399,6 +409,13 @@ PEG.Compiler = {
        }

        var replaceFunctions = {
+          grammar:
+            function(node, from, to) {
+              for (var name in node.rules) {
+                replace(ast.rules[name], from, to);
+              }
+            },
+
          rule:          replaceInExpression,
          choice:        replaceInSubnodes("alternatives"),
          sequence:      replaceInSubnodes("elements"),
@ -426,18 +443,16 @@ PEG.Compiler = {
          replaceFunctions[node.type](node, from, to);
        }

-        for (var rule in ast.rules) {
-          replace(ast.rules[rule], from, to);
-        }
+        replace(ast, from, to);
      }

-      for (var rule in ast.rules) {
-        if (isProxyRule(ast.rules[rule])) {
-          replaceRuleRefs(ast, ast.rules[rule].name, ast.rules[rule].expression.name);
-          if (rule === ast.startRule) {
-            ast.startRule = ast.rules[rule].expression.name;
+      for (var name in ast.rules) {
+        if (isProxyRule(ast.rules[name])) {
+          replaceRuleRefs(ast, ast.rules[name].name, ast.rules[name].expression.name);
+          if (name === ast.startRule) {
+            ast.startRule = ast.rules[name].expression.name;
          }
-          delete ast.rules[rule];
+          delete ast.rules[name];
        }
      }

@ -446,6 +461,203 @@ PEG.Compiler = {
  ],

  _compileFunctions: {
+    grammar: function(node) {
+      var initializerCode = node.initializer !== null
+        ?  PEG.Compiler.compileNode(node.initializer)
+        : "";
+
+      var parseFunctionDefinitions = [];
+      for (var name in node.rules) {
+        parseFunctionDefinitions.push(PEG.Compiler.compileNode(node.rules[name]));
+      }
+
+      return PEG.Compiler.formatCode(
+        "(function(){",
+        "  /* Generated by PEG.js (http://pegjs.majda.cz/). */",
+        "  ",
+        "  var result = {",
+        "    /*",
+        "     * Parses the input with a generated parser. If the parsing is successfull,",
+        "     * returns a value explicitly or implicitly specified by the grammar from",
+        "     * which the parser was generated (see |PEG.buildParser|). If the parsing is",
+        "     * unsuccessful, throws |PEG.grammarParser.SyntaxError| describing the error.",
+        "     */",
+        "    parse: function(input) {",
+        "      var pos = 0;",
+        "      var rightmostMatchFailuresPos = 0;",
+        "      var rightmostMatchFailuresExpected = [];",
+        "      var cache = {};",
+        "      ",
+        /* This needs to be in sync with PEG.StringUtils.quote. */
+        "      function quoteString(s) {",
+        "        /*",
+        "         * ECMA-262, 5th ed., 7.8.4: All characters may appear literally in a",
+        "         * string literal except for the closing quote character, backslash,",
+        "         * carriage return, line separator, paragraph separator, and line feed.",
+        "         * Any character may appear in the form of an escape sequence.",
+        "         */",
+        "        return '\"' + s",
+        "          .replace(/\\\\/g, '\\\\\\\\')        // backslash",
+        "          .replace(/\"/g, '\\\\\"')          // closing quote character",
+        "          .replace(/\\r/g, '\\\\r')         // carriage return",
+        "          .replace(/\\u2028/g, '\\\\u2028') // line separator",
+        "          .replace(/\\u2029/g, '\\\\u2029') // paragraph separator",
+        "          .replace(/\\n/g, '\\\\n')         // line feed",
+        "          + '\"';",
+        "      }",
+        "      ",
+        /* This needs to be in sync with PEG.ArrayUtils.contains. */
+        "      function arrayContains(array, value) {",
+        "        /*",
+        "         * Stupid IE does not have Array.prototype.indexOf, otherwise this",
+        "         * function would be a one-liner.",
+        "         */",
+        "        var length = array.length;",
+        "        for (var i = 0; i < length; i++) {",
+        "          if (array[i] === value) {",
+        "            return true;",
+        "          }",
+        "        }",
+        "        return false;",
+        "      }",
+        "      ",
+        "      function matchFailed(failure) {",
+        "        if (pos < rightmostMatchFailuresPos) {",
+        "          return;",
+        "        }",
+        "        ",
+        "        if (pos > rightmostMatchFailuresPos) {",
+        "          rightmostMatchFailuresPos = pos;",
+        "          rightmostMatchFailuresExpected = [];",
+        "        }",
+        "        ",
+        "        if (!arrayContains(rightmostMatchFailuresExpected, failure)) {",
+        "          rightmostMatchFailuresExpected.push(failure);",
+        "        }",
+        "      }",
+        "      ",
+        "      ${parseFunctionDefinitions}",
+        "      ",
+        "      function buildErrorMessage() {",
+        "        function buildExpected(failuresExpected) {",
+        "          switch (failuresExpected.length) {",
+        "            case 0:",
+        "              return 'end of input';",
+        "            case 1:",
+        "              return failuresExpected[0];",
+        "            default:",
+        "              failuresExpected.sort();",
+        "              return failuresExpected.slice(0, failuresExpected.length - 1).join(', ')",
+        "                + ' or '",
+        "                + failuresExpected[failuresExpected.length - 1];",
+        "          }",
+        "        }",
+        "        ",
+        "        var expected = buildExpected(rightmostMatchFailuresExpected);",
+        "        var actualPos = Math.max(pos, rightmostMatchFailuresPos);",
+        "        var actual = actualPos < input.length",
+        "          ? quoteString(input.charAt(actualPos))",
+        "          : 'end of input';",
+        "        ",
+        "        return 'Expected ' + expected + ' but ' + actual + ' found.';",
+        "      }",
+        "      ",
+        "      function computeErrorPosition() {",
+        "        /*",
+        "         * The first idea was to use |String.split| to break the input up to the",
+        "         * error position along newlines and derive the line and column from",
+        "         * there. However IE's |split| implementation is so broken that it was",
+        "         * enough to prevent it.",
+        "         */",
+        "        ",
+        "        var line = 1;",
+        "        var column = 1;",
+        "        var seenCR = false;",
+        "        ",
+        "        for (var i = 0; i <  rightmostMatchFailuresPos; i++) {",
+        "          var ch = input.charAt(i);",
+        "          if (ch === '\\n') {",
+        "            if (!seenCR) { line++; }",
+        "            column = 1;",
+        "            seenCR = false;",
+        "          } else if (ch === '\\r' | ch === '\\u2028' || ch === '\\u2029') {",
+        "            line++;",
+        "            column = 1;",
+        "            seenCR = true;",
+        "          } else {",
+        "            column++;",
+        "            seenCR = false;",
+        "          }",
+        "        }",
+        "        ",
+        "        return { line: line, column: column };",
+        "      }",
+        "      ",
+        "      ${initializerCode}",
+        "      ",
+        "      var result = parse_${startRule}({ reportMatchFailures: true });",
+        "      ",
+        "      /*",
+        "       * The parser is now in one of the following three states:",
+        "       *",
+        "       * 1. The parser successfully parsed the whole input.",
+        "       *",
+        "       *    - |result !== null|",
+        "       *    - |pos === input.length|",
+        "       *    - |rightmostMatchFailuresExpected| may or may not contain something",
+        "       *",
+        "       * 2. The parser successfully parsed only a part of the input.",
+        "       *",
+        "       *    - |result !== null|",
+        "       *    - |pos < input.length|",
+        "       *    - |rightmostMatchFailuresExpected| may or may not contain something",
+        "       *",
+        "       * 3. The parser did not successfully parse any part of the input.",
+        "       *",
+        "       *   - |result === null|",
+        "       *   - |pos === 0|",
+        "       *   - |rightmostMatchFailuresExpected| contains at least one failure",
+        "       *",
+        "       * All code following this comment (including called functions) must",
+        "       * handle these states.",
+        "       */",
+        "      if (result === null || pos !== input.length) {",
+        "        var errorPosition = computeErrorPosition();",
+        "        throw new this.SyntaxError(",
+        "          buildErrorMessage(),",
+        "          errorPosition.line,",
+        "          errorPosition.column",
+        "        );",
+        "      }",
+        "      ",
+        "      return result;",
+        "    },",
+        "    ",
+        "    /* Returns the parser source code. */",
+        "    toSource: function() { return this._source; }",
+        "  };",
+        "  ",
+        "  /* Thrown when a parser encounters a syntax error. */",
+        "  ",
+        "  result.SyntaxError = function(message, line, column) {",
+        "    this.name = 'SyntaxError';",
+        "    this.message = message;",
+        "    this.line = line;",
+        "    this.column = column;",
+        "  };",
+        "  ",
+        "  result.SyntaxError.prototype = Error.prototype;",
+        "  ",
+        "  return result;",
+        "})()",
+        {
+          initializerCode:          initializerCode,
+          parseFunctionDefinitions: parseFunctionDefinitions.join("\n\n"),
+          startRule:                node.startRule
+        }
+      );
+    },
+
    initializer: function(node) {
      return node.code;
    },
@ -870,203 +1082,10 @@ PEG.Compiler = {
      ast = this._passes[i](ast);
    }

-    var initializerCode = ast.initializer !== null
-      ?  this.compileNode(ast.initializer)
-      : "";
-
-    var parseFunctionDefinitions = [];
-    for (var rule in ast.rules) {
-      parseFunctionDefinitions.push(this.compileNode(ast.rules[rule]));
-    }
-
-    var source = this.formatCode(
-      "(function(){",
-      "  /* Generated by PEG.js (http://pegjs.majda.cz/). */",
-      "  ",
-      "  var result = {",
-      "    /*",
-      "     * Parses the input with a generated parser. If the parsing is successfull,",
-      "     * returns a value explicitly or implicitly specified by the grammar from",
-      "     * which the parser was generated (see |PEG.buildParser|). If the parsing is",
-      "     * unsuccessful, throws |PEG.grammarParser.SyntaxError| describing the error.",
-      "     */",
-      "    parse: function(input) {",
-      "      var pos = 0;",
-      "      var rightmostMatchFailuresPos = 0;",
-      "      var rightmostMatchFailuresExpected = [];",
-      "      var cache = {};",
-      "      ",
-      /* This needs to be in sync with PEG.StringUtils.quote. */
-      "      function quoteString(s) {",
-      "        /*",
-      "         * ECMA-262, 5th ed., 7.8.4: All characters may appear literally in a",
-      "         * string literal except for the closing quote character, backslash,",
-      "         * carriage return, line separator, paragraph separator, and line feed.",
-      "         * Any character may appear in the form of an escape sequence.",
-      "         */",
-      "        return '\"' + s",
-      "          .replace(/\\\\/g, '\\\\\\\\')        // backslash",
-      "          .replace(/\"/g, '\\\\\"')          // closing quote character",
-      "          .replace(/\\r/g, '\\\\r')         // carriage return",
-      "          .replace(/\\u2028/g, '\\\\u2028') // line separator",
-      "          .replace(/\\u2029/g, '\\\\u2029') // paragraph separator",
-      "          .replace(/\\n/g, '\\\\n')         // line feed",
-      "          + '\"';",
-      "      }",
-      "      ",
-      /* This needs to be in sync with PEG.ArrayUtils.contains. */
-      "      function arrayContains(array, value) {",
-      "        /*",
-      "         * Stupid IE does not have Array.prototype.indexOf, otherwise this",
-      "         * function would be a one-liner.",
-      "         */",
-      "        var length = array.length;",
-      "        for (var i = 0; i < length; i++) {",
-      "          if (array[i] === value) {",
-      "            return true;",
-      "          }",
-      "        }",
-      "        return false;",
-      "      }",
-      "      ",
-      "      function matchFailed(failure) {",
-      "        if (pos < rightmostMatchFailuresPos) {",
-      "          return;",
-      "        }",
-      "        ",
-      "        if (pos > rightmostMatchFailuresPos) {",
-      "          rightmostMatchFailuresPos = pos;",
-      "          rightmostMatchFailuresExpected = [];",
-      "        }",
-      "        ",
-      "        if (!arrayContains(rightmostMatchFailuresExpected, failure)) {",
-      "          rightmostMatchFailuresExpected.push(failure);",
-      "        }",
-      "      }",
-      "      ",
-      "      ${parseFunctionDefinitions}",
-      "      ",
-      "      function buildErrorMessage() {",
-      "        function buildExpected(failuresExpected) {",
-      "          switch (failuresExpected.length) {",
-      "            case 0:",
-      "              return 'end of input';",
-      "            case 1:",
-      "              return failuresExpected[0];",
-      "            default:",
-      "              failuresExpected.sort();",
-      "              return failuresExpected.slice(0, failuresExpected.length - 1).join(', ')",
-      "                + ' or '",
-      "                + failuresExpected[failuresExpected.length - 1];",
-      "          }",
-      "        }",
-      "        ",
-      "        var expected = buildExpected(rightmostMatchFailuresExpected);",
-      "        var actualPos = Math.max(pos, rightmostMatchFailuresPos);",
-      "        var actual = actualPos < input.length",
-      "          ? quoteString(input.charAt(actualPos))",
-      "          : 'end of input';",
-      "        ",
-      "        return 'Expected ' + expected + ' but ' + actual + ' found.';",
-      "      }",
-      "      ",
-      "      function computeErrorPosition() {",
-      "        /*",
-      "         * The first idea was to use |String.split| to break the input up to the",
-      "         * error position along newlines and derive the line and column from",
-      "         * there. However IE's |split| implementation is so broken that it was",
-      "         * enough to prevent it.",
-      "         */",
-      "        ",
-      "        var line = 1;",
-      "        var column = 1;",
-      "        var seenCR = false;",
-      "        ",
-      "        for (var i = 0; i <  rightmostMatchFailuresPos; i++) {",
-      "          var ch = input.charAt(i);",
-      "          if (ch === '\\n') {",
-      "            if (!seenCR) { line++; }",
-      "            column = 1;",
-      "            seenCR = false;",
-      "          } else if (ch === '\\r' | ch === '\\u2028' || ch === '\\u2029') {",
-      "            line++;",
-      "            column = 1;",
-      "            seenCR = true;",
-      "          } else {",
-      "            column++;",
-      "            seenCR = false;",
-      "          }",
-      "        }",
-      "        ",
-      "        return { line: line, column: column };",
-      "      }",
-      "      ",
-      "      ${initializerCode}",
-      "      ",
-      "      var result = parse_${startRule}({ reportMatchFailures: true });",
-      "      ",
-      "      /*",
-      "       * The parser is now in one of the following three states:",
-      "       *",
-      "       * 1. The parser successfully parsed the whole input.",
-      "       *",
-      "       *    - |result !== null|",
-      "       *    - |pos === input.length|",
-      "       *    - |rightmostMatchFailuresExpected| may or may not contain something",
-      "       *",
-      "       * 2. The parser successfully parsed only a part of the input.",
-      "       *",
-      "       *    - |result !== null|",
-      "       *    - |pos < input.length|",
-      "       *    - |rightmostMatchFailuresExpected| may or may not contain something",
-      "       *",
-      "       * 3. The parser did not successfully parse any part of the input.",
-      "       *",
-      "       *   - |result === null|",
-      "       *   - |pos === 0|",
-      "       *   - |rightmostMatchFailuresExpected| contains at least one failure",
-      "       *",
-      "       * All code following this comment (including called functions) must",
-      "       * handle these states.",
-      "       */",
-      "      if (result === null || pos !== input.length) {",
-      "        var errorPosition = computeErrorPosition();",
-      "        throw new this.SyntaxError(",
-      "          buildErrorMessage(),",
-      "          errorPosition.line,",
-      "          errorPosition.column",
-      "        );",
-      "      }",
-      "      ",
-      "      return result;",
-      "    },",
-      "    ",
-      "    /* Returns the parser source code. */",
-      "    toSource: function() { return this._source; }",
-      "  };",
-      "  ",
-      "  /* Thrown when a parser encounters a syntax error. */",
-      "  ",
-      "  result.SyntaxError = function(message, line, column) {",
-      "    this.name = 'SyntaxError';",
-      "    this.message = message;",
-      "    this.line = line;",
-      "    this.column = column;",
-      "  };",
-      "  ",
-      "  result.SyntaxError.prototype = Error.prototype;",
-      "  ",
-      "  return result;",
-      "})()",
-      {
-        initializerCode:          initializerCode,
-        parseFunctionDefinitions: parseFunctionDefinitions.join("\n\n"),
-        startRule:                ast.startRule
-      }
-    );
-
+    var source = this.compileNode(ast);
    var result = eval(source);
    result._source = source;
+
    return result;
  }
 };
--- a/lib/metagrammar.js
+++ b/lib/metagrammar.js
@ -105,6 +105,7 @@ PEG.grammarParser = (function(){
                PEG.ArrayUtils.each(rules, function(rule) { rulesConverted[rule.name] = rule; });
          
                return {
+                  type:        "grammar",
                  initializer: initializer !== "" ? initializer : null,
                  rules:       rulesConverted,
                  startRule:   rules[0].name
--- a/lib/metagrammar.pegjs
+++ b/lib/metagrammar.pegjs
@ -4,6 +4,7 @@ grammar
      PEG.ArrayUtils.each(rules, function(rule) { rulesConverted[rule.name] = rule; });

      return {
+        type:        "grammar",
        initializer: initializer !== "" ? initializer : null,
        rules:       rulesConverted,
        startRule:   rules[0].name
--- a/test/metagrammar-test.js
+++ b/test/metagrammar-test.js
@ -127,6 +127,7 @@ var choiceLiterals = choice([literalAbcd, literalEfgh, literalIjkl]);

 function oneRuleGrammar(expression) {
  return {
+    type:        "grammar",
    initializer: null,
    rules:       { start: rule("start", null, expression) },
    startRule:   "start"
@ -156,6 +157,7 @@ function actionGrammar(action) {
 }

 var initializerGrammar = {
+  type:        "grammar",
  initializer: initializer(" code "),
  rules: {
    a: rule("a", null, literalAbcd),
@ -168,6 +170,7 @@ test("parses grammar", function() {
  grammarParserParses(
    'a = "abcd"',
    {
+      type:        "grammar",
      initializer: null,
      rules:       { a: rule("a", null, literalAbcd) },
      startRule:   "a"
@ -177,6 +180,7 @@ test("parses grammar", function() {
  grammarParserParses(
    'a = "abcd"; b = "efgh"; c = "ijkl"',
    {
+      type:        "grammar",
      initializer: null,
      rules: {
        a: rule("a", null, literalAbcd),
@ -203,6 +207,7 @@ test("parses rule", function() {
  grammarParserParses(
    'start "start rule" = "abcd" / "efgh" / "ijkl"',
    {
+      type:        "grammar",
      initializer: null,
      rules:       { start: rule("start", "start rule", choiceLiterals) },
      startRule:   "start"