Add ability to start parsing from any grammar rule

Calling the parsing function could have been done without the ugly table using |eval|, but this seemed to degrade performance significantly (by about 3 %). This is probably because engines optimize badly in presence of |eval|. The method used in this patch does not change the benchmark suite execution speed statistically significantly on V8. Detailed results (benchmark suite totals): --------------------------------- Test # Before After --------------------------------- 1 38.24 kB/s 38.28 kB/s 2 38.35 kB/s 38.15 kB/s 3 38.43 kB/s 38.40 kB/s 4 38.53 kB/s 38.20 kB/s 5 38.25 kB/s 38.39 kB/s --------------------------------- Average 38.36 kB/s 38.39 kB/s --------------------------------- Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.151 Safari/534.1
2011-03-29 15:40:34 +02:00 · 2011-03-29 15:40:34 +02:00 · cc7ad9739f
parent 1e57bf778d
commit cc7ad9739f
4 changed files with 115 additions and 4 deletions
--- a/src/emitter.js
+++ b/src/emitter.js
@ -96,6 +96,12 @@ PEG.compiler.emitter = function(ast) {
        ? emit(node.initializer)
        : "";

+      var parseFunctionTableItems = [];
+      for (var name in node.rules) {
+        parseFunctionTableItems.push(name + ": parse_" + name);
+      }
+      parseFunctionTableItems.sort();
+
      var parseFunctionDefinitions = [];
      for (var name in node.rules) {
        parseFunctionDefinitions.push(emit(node.rules[name]));
@ -112,7 +118,19 @@ PEG.compiler.emitter = function(ast) {
        "     * which the parser was generated (see |PEG.buildParser|). If the parsing is",
        "     * unsuccessful, throws |PEG.parser.SyntaxError| describing the error.",
        "     */",
-        "    parse: function(input) {",
+        "    parse: function(input, startRule) {",
+        "      var parseFunctions = {",
+        "        ${parseFunctionTableItems}",
+        "      };",
+        "      ",
+        "      if (startRule !== undefined) {",
+        "        if (parseFunctions[startRule] === undefined) {",
+        "          throw new Error(\"Invalid rule name: \" + quote(startRule) + \".\");",
+        "        }",
+        "      } else {",
+        "        startRule = ${startRule|string};",
+        "      }",
+        "      ",
        "      var pos = 0;",
        "      var reportMatchFailures = true;",
        "      var rightmostMatchFailuresPos = 0;",
@ -245,7 +263,7 @@ PEG.compiler.emitter = function(ast) {
        "      ",
        "      ${initializerCode}",
        "      ",
-        "      var result = parse_${startRule}();",
+        "      var result = parseFunctions[startRule]();",
        "      ",
        "      /*",
        "       * The parser is now in one of the following three states:",
@ -302,6 +320,7 @@ PEG.compiler.emitter = function(ast) {
        "})()",
        {
          initializerCode:          initializerCode,
+          parseFunctionTableItems:  parseFunctionTableItems.join(",\n"),
          parseFunctionDefinitions: parseFunctionDefinitions.join("\n\n"),
          startRule:                node.startRule
        }
--- a/src/parser.js
+++ b/src/parser.js
@ -8,7 +8,73 @@ PEG.parser = (function(){
     * which the parser was generated (see |PEG.buildParser|). If the parsing is
     * unsuccessful, throws |PEG.parser.SyntaxError| describing the error.
     */
-    parse: function(input) {
+    parse: function(input, startRule) {
+      var parseFunctions = {
+        __: parse___,
+        action: parse_action,
+        and: parse_and,
+        braced: parse_braced,
+        bracketDelimitedCharacter: parse_bracketDelimitedCharacter,
+        choice: parse_choice,
+        class: parse_class,
+        classCharacter: parse_classCharacter,
+        classCharacterRange: parse_classCharacterRange,
+        colon: parse_colon,
+        comment: parse_comment,
+        digit: parse_digit,
+        dot: parse_dot,
+        doubleQuotedCharacter: parse_doubleQuotedCharacter,
+        doubleQuotedLiteral: parse_doubleQuotedLiteral,
+        eol: parse_eol,
+        eolChar: parse_eolChar,
+        eolEscapeSequence: parse_eolEscapeSequence,
+        equals: parse_equals,
+        grammar: parse_grammar,
+        hexDigit: parse_hexDigit,
+        hexEscapeSequence: parse_hexEscapeSequence,
+        identifier: parse_identifier,
+        initializer: parse_initializer,
+        labeled: parse_labeled,
+        letter: parse_letter,
+        literal: parse_literal,
+        lowerCaseLetter: parse_lowerCaseLetter,
+        lparen: parse_lparen,
+        multiLineComment: parse_multiLineComment,
+        nonBraceCharacter: parse_nonBraceCharacter,
+        nonBraceCharacters: parse_nonBraceCharacters,
+        not: parse_not,
+        plus: parse_plus,
+        prefixed: parse_prefixed,
+        primary: parse_primary,
+        question: parse_question,
+        rparen: parse_rparen,
+        rule: parse_rule,
+        semicolon: parse_semicolon,
+        sequence: parse_sequence,
+        simpleBracketDelimitedCharacter: parse_simpleBracketDelimitedCharacter,
+        simpleDoubleQuotedCharacter: parse_simpleDoubleQuotedCharacter,
+        simpleEscapeSequence: parse_simpleEscapeSequence,
+        simpleSingleQuotedCharacter: parse_simpleSingleQuotedCharacter,
+        singleLineComment: parse_singleLineComment,
+        singleQuotedCharacter: parse_singleQuotedCharacter,
+        singleQuotedLiteral: parse_singleQuotedLiteral,
+        slash: parse_slash,
+        star: parse_star,
+        suffixed: parse_suffixed,
+        unicodeEscapeSequence: parse_unicodeEscapeSequence,
+        upperCaseLetter: parse_upperCaseLetter,
+        whitespace: parse_whitespace,
+        zeroEscapeSequence: parse_zeroEscapeSequence
+      };
+      
+      if (startRule !== undefined) {
+        if (parseFunctions[startRule] === undefined) {
+          throw new Error("Invalid rule name: " + quote(startRule) + ".");
+        }
+      } else {
+        startRule = "grammar";
+      }
+      
      var pos = 0;
      var reportMatchFailures = true;
      var rightmostMatchFailuresPos = 0;
@ -3516,7 +3582,7 @@ PEG.parser = (function(){
      
      
      
-      var result = parse_grammar();
+      var result = parseFunctions[startRule]();
      
      /*
       * The parser is now in one of the following three states:
--- a/test/compiler-test.js
+++ b/test/compiler-test.js
@ -395,6 +395,28 @@ test("error positions", function() {
  doesNotParseWithPos(parser, "1\u2029x", 2, 1); // paragraph separator
 });

+test("start rule", function() {
+  var parser = PEG.buildParser([
+    'a = .* { return "alpha"; }',
+    'b = .* { return "beta"; }'
+  ].join("\n"));
+
+  /* Default start rule = the first one */
+  parses(parser, "whatever", "alpha");
+
+  /* Explicit specification of the start rule */
+  parsesWithStartRule(parser, "whatever", "a", "alpha");
+  parsesWithStartRule(parser, "whatever", "b", "beta");
+
+  /* Invalid rule name */
+  raises(
+    function() { parser.parse("whatever", "c") },
+    function(e) {
+      return e instanceof Error && e.message === "Invalid rule name: \"c\".";
+    }
+  );
+});
+
 /*
 * Following examples are from Wikipedia, see
 * http://en.wikipedia.org/w/index.php?title=Parsing_expression_grammar&oldid=335106938.
--- a/test/helpers.js
+++ b/test/helpers.js
@ -2,6 +2,10 @@ parses = function(parser, input, expected) {
  deepEqual(parser.parse(input), expected);
 };

+parsesWithStartRule = function(parser, input, startRule, expected) {
+  deepEqual(parser.parse(input, startRule), expected);
+};
+
 doesNotParse = function(parser, input) {
  raises(function() { parser.parse(input); }, parser.SyntaxError);
 };