Allowed start rules must be specified explicitly

Before this commit, generated parser were able to start parsing from any rule. This was nice, but it made rule code inlining impossible. Since this commit, the list of allowed start rules has to be specified explicitly using the |allowedStartRules| option of the |PEG.buildParser| method (or the --allowed-start-rule option on the command-line). These rules will be excluded from inlining when it's implemented.
12 years ago · 208cc33930
parent 6a1ec7631f
commit 208cc33930
5 changed files with 76 additions and 78 deletions
--- a/README.md
+++ b/README.md
@ -55,10 +55,11 @@ If you omit both input and ouptut file, standard input and output are used.

 By default, the parser object is assigned to `module.exports`, which makes the output a Node.js module. You can assign it to another variable by passing a variable name using the `-e`/`--export-var` option. This may be helpful if you want to use the parser in browser environment.

-You can tweak the generated parser with two options:
+You can tweak the generated parser with several options:

  * `--cache` — makes the parser cache results, avoiding exponential parsing time in pathological cases but making the parser slower
  * `--track-line-and-column` — makes the parser track line and column (available as `line` and `column` variables in the actions and predicates)
+  * `--allowed-start-rules` — comma-separated list of rules the parser will be allowed to start parsing from (default: the first rule in the grammar)

 ### JavaScript API

@ -80,6 +81,7 @@ You can tweak the generated parser by passing a second parameter with an options

  * `cache` — if `true`, makes the parser cache results, avoiding exponential parsing time in pathological cases but making the parser slower (default: `false`)
  * `trackLineAndColumn` — if `true`, makes the parser track line and column (available as `line` and `column` variables in the actions and predicates) (default: `false`)
+  * `allowedStartRules` — rules the parser will be allowed to start parsing from (default: the first rule in the grammar)

 Using the Parser
 ----------------
--- a/bin/pegjs
+++ b/bin/pegjs
@ -21,12 +21,17 @@ function printHelp() {
  util.puts("omitted, standard input and output are used.");
  util.puts("");
  util.puts("Options:");
-  util.puts("  -e, --export-var <variable>  name of the variable where the parser object");
-  util.puts("                               will be stored (default: \"module.exports\")");
-  util.puts("      --cache                  make generated parser cache results");
-  util.puts("      --track-line-and-column  make generated parser track line and column");
-  util.puts("  -v, --version                print version information and exit");
-  util.puts("  -h, --help                   print help and exit");
+  util.puts("  -e, --export-var <variable>        name of the variable where the parser");
+  util.puts("                                     object will be stored (default:");
+  util.puts("                                     \"module.exports\")");
+  util.puts("      --cache                        make generated parser cache results");
+  util.puts("      --track-line-and-column        make generated parser track line and column");
+  util.puts("      --allowed-start-rules <rules>  comma-separated list of rules the generated");
+  util.puts("                                     parser will be allowed to start parsing");
+  util.puts("                                     from (default: the first rule in the");
+  util.puts("                                     grammar)");
+  util.puts("  -v, --version                      print version information and exit");
+  util.puts("  -h, --help                         print help and exit");
 }

 function exitSuccess() {
@ -90,6 +95,16 @@ while (args.length > 0 && isOption(args[0])) {
      options.trackLineAndColumn = true;
      break;

+    case "--allowed-start-rules":
+      nextArg();
+      if (args.length === 0) {
+        abort("Missing parameter of the -e/--allowed-start-rules option.");
+      }
+      options.allowedStartRules = args[0]
+        .split(",")
+        .map(function(s) { return s.trim() });
+      break;
+
    case "-v":
    case "--version":
      printVersion();
--- a/spec/generated-parser.spec.js
+++ b/spec/generated-parser.spec.js
@ -135,27 +135,28 @@ describe("generated parser", function() {
  describe("parse", function() {
    var parser = PEG.buildParser([
          'a = "x" { return "a"; }',
-          'b = "x" { return "b"; }'
-        ].join("\n"));
+          'b = "x" { return "b"; }',
+          'c = "x" { return "c"; }'
+        ].join("\n"), { allowedStartRules: ["b", "c"] });

    describe("start rule", function() {
      describe("without the |startRule| option", function() {
-        it("uses the first rule", function() {
-          expect(parser).toParse("x", "a");
+        it("uses the first allowed rule", function() {
+          expect(parser).toParse("x", "b");
        });
      });

-      describe("when the |startRule| option specifies existing rule", function() {
+      describe("when the |startRule| option specifies allowed rule", function() {
        it("uses the specified rule", function() {
-          expect(parser).toParse("x", { startRule: "a" }, "a");
          expect(parser).toParse("x", { startRule: "b" }, "b");
+          expect(parser).toParse("x", { startRule: "c" }, "c");
        });
      });

-      describe("when the |startRule| option specifies non-existent rule", function() {
+      describe("when the |startRule| option specifies disallowed rule", function() {
        it("throws exception", function() {
-          expect(parser).toFailToParse("x", { startRule: "c" }, {
-            message: "Invalid rule name: \"c\"."
+          expect(parser).toFailToParse("x", { startRule: "a" }, {
+            message: "Can't start parsing from rule \"a\"."
          });
        });
      });
@ -899,6 +900,40 @@ describe("generated parser", function() {
      });
    });

+    describe("allowed start rules", function() {
+      var grammar = [
+            'a = "x"',
+            'b = "x"',
+            'c = "x"'
+          ].join("\n");
+
+      describe("without the |allowedStartRules| option", function() {
+        var parser = PEG.buildParser(grammar);
+
+        it("allows the first rule", function() {
+          expect(parser).toParse("x", { startRule: "a" }, "x");
+        });
+
+        it("does not allow any other rules", function() {
+          expect(parser).toFailToParse("x", { startRule: "b" }, { });
+          expect(parser).toFailToParse("x", { startRule: "c" }, { });
+        });
+      });
+
+      describe("with the |allowedStartRules| option", function() {
+        var parser = PEG.buildParser(grammar, { allowedStartRules: ["b", "c"] });
+
+        it("allows the specified rules", function() {
+          expect(parser).toParse("x", { startRule: "b" }, "x");
+          expect(parser).toParse("x", { startRule: "c" }, "x");
+        });
+
+        it("does not allow any other rules", function() {
+          expect(parser).toFailToParse("x", { startRule: "a" }, { });
+        });
+      });
+    });
+
    /*
     * Following examples are from Wikipedia, see
     * http://en.wikipedia.org/w/index.php?title=Parsing_expression_grammar&oldid=335106938.
--- a/src/compiler/passes/generate-code.js
+++ b/src/compiler/passes/generate-code.js
@ -3,7 +3,8 @@ PEG.compiler.passes.generateCode = function(ast, options) {
  options = clone(options) || {};
  defaults(options, {
    cache:              false,
-    trackLineAndColumn: false
+    trackLineAndColumn: false,
+    allowedStartRules:  [ast.startRule]
  });

  /*
@ -305,8 +306,8 @@ PEG.compiler.passes.generateCode = function(ast, options) {
            '     */',
            '    parse: function(input) {',
            '      var parseFunctions = {',
-            '        #for rule in node.rules',
-            '          #{string(rule.name) + ": parse_" + rule.name + (rule !== node.rules[node.rules.length - 1] ? "," : "")}',
+            '        #for rule in options.allowedStartRules',
+            '          #{string(rule) + ": parse_" + rule + (rule !== options.allowedStartRules[options.allowedStartRules.length - 1] ? "," : "")}',
            '        #end',
            '      };',
            '      ',
@ -317,10 +318,10 @@ PEG.compiler.passes.generateCode = function(ast, options) {
            '        startRule = options.startRule;',
            '        ',
            '        if (parseFunctions[startRule] === undefined) {',
-            '          throw new Error("Invalid rule name: " + quote(startRule) + ".");',
+            '          throw new Error("Can\'t start parsing from rule " + quote(startRule) + ".");',
            '        }',
            '      } else {',
-            '        startRule = #{string(node.startRule)};',
+            '        startRule = #{string(options.allowedStartRules[0])};',
            '      }',
            '      ',
            '      #{posInit("pos")};',
--- a/src/parser.js
+++ b/src/parser.js
@ -37,62 +37,7 @@ PEG.parser = (function(){
     */
    parse: function(input) {
      var parseFunctions = {
-        "grammar": parse_grammar,
-        "initializer": parse_initializer,
-        "rule": parse_rule,
-        "choice": parse_choice,
-        "sequence": parse_sequence,
-        "labeled": parse_labeled,
-        "prefixed": parse_prefixed,
-        "suffixed": parse_suffixed,
-        "primary": parse_primary,
-        "action": parse_action,
-        "braced": parse_braced,
-        "nonBraceCharacters": parse_nonBraceCharacters,
-        "nonBraceCharacter": parse_nonBraceCharacter,
-        "equals": parse_equals,
-        "colon": parse_colon,
-        "semicolon": parse_semicolon,
-        "slash": parse_slash,
-        "and": parse_and,
-        "not": parse_not,
-        "question": parse_question,
-        "star": parse_star,
-        "plus": parse_plus,
-        "lparen": parse_lparen,
-        "rparen": parse_rparen,
-        "dot": parse_dot,
-        "identifier": parse_identifier,
-        "literal": parse_literal,
-        "string": parse_string,
-        "doubleQuotedString": parse_doubleQuotedString,
-        "doubleQuotedCharacter": parse_doubleQuotedCharacter,
-        "simpleDoubleQuotedCharacter": parse_simpleDoubleQuotedCharacter,
-        "singleQuotedString": parse_singleQuotedString,
-        "singleQuotedCharacter": parse_singleQuotedCharacter,
-        "simpleSingleQuotedCharacter": parse_simpleSingleQuotedCharacter,
-        "class": parse_class,
-        "classCharacterRange": parse_classCharacterRange,
-        "classCharacter": parse_classCharacter,
-        "bracketDelimitedCharacter": parse_bracketDelimitedCharacter,
-        "simpleBracketDelimitedCharacter": parse_simpleBracketDelimitedCharacter,
-        "simpleEscapeSequence": parse_simpleEscapeSequence,
-        "zeroEscapeSequence": parse_zeroEscapeSequence,
-        "hexEscapeSequence": parse_hexEscapeSequence,
-        "unicodeEscapeSequence": parse_unicodeEscapeSequence,
-        "eolEscapeSequence": parse_eolEscapeSequence,
-        "digit": parse_digit,
-        "hexDigit": parse_hexDigit,
-        "letter": parse_letter,
-        "lowerCaseLetter": parse_lowerCaseLetter,
-        "upperCaseLetter": parse_upperCaseLetter,
-        "__": parse___,
-        "comment": parse_comment,
-        "singleLineComment": parse_singleLineComment,
-        "multiLineComment": parse_multiLineComment,
-        "eol": parse_eol,
-        "eolChar": parse_eolChar,
-        "whitespace": parse_whitespace
+        "grammar": parse_grammar
      };
      
      var options = arguments.length > 1 ? arguments[1] : {},
@ -102,7 +47,7 @@ PEG.parser = (function(){
        startRule = options.startRule;
        
        if (parseFunctions[startRule] === undefined) {
-          throw new Error("Invalid rule name: " + quote(startRule) + ".");
+          throw new Error("Can't start parsing from rule " + quote(startRule) + ".");
        }
      } else {
        startRule = "grammar";