Add ability to start parsing from any grammar rule

Calling the parsing function could have been done without the ugly table using |eval|, but this seemed to degrade performance significantly (by about 3 %). This is probably because engines optimize badly in presence of |eval|. The method used in this patch does not change the benchmark suite execution speed statistically significantly on V8. Detailed results (benchmark suite totals): --------------------------------- Test # Before After --------------------------------- 1 38.24 kB/s 38.28 kB/s 2 38.35 kB/s 38.15 kB/s 3 38.43 kB/s 38.40 kB/s 4 38.53 kB/s 38.20 kB/s 5 38.25 kB/s 38.39 kB/s --------------------------------- Average 38.36 kB/s 38.39 kB/s --------------------------------- Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.151 Safari/534.1
2011-03-29 15:40:34 +02:00 · 2011-03-29 15:40:34 +02:00 · cc7ad9739f
parent 1e57bf778d
commit cc7ad9739f
4 changed files with 115 additions and 4 deletions
--- a/src/emitter.js
+++ b/src/emitter.js
@ -96,6 +96,12 @@ PEG.compiler.emitter = function(ast) {
        ? emit(node.initializer)
        : "";
      var parseFunctionTableItems = [];
      for (var name in node.rules) {
        parseFunctionTableItems.push(name + ": parse_" + name);
      }
      parseFunctionTableItems.sort();
      var parseFunctionDefinitions = [];
      for (var name in node.rules) {
        parseFunctionDefinitions.push(emit(node.rules[name]));
@ -112,7 +118,19 @@ PEG.compiler.emitter = function(ast) {
        "     * which the parser was generated (see |PEG.buildParser|). If the parsing is",
        "     * unsuccessful, throws |PEG.parser.SyntaxError| describing the error.",
        "     */",
-        "    parse: function(input) {",
+        "    parse: function(input, startRule) {",
        "      var parseFunctions = {",
        "        ${parseFunctionTableItems}",
        "      };",
        "      ",
        "      if (startRule !== undefined) {",
        "        if (parseFunctions[startRule] === undefined) {",
        "          throw new Error(\"Invalid rule name: \" + quote(startRule) + \".\");",
        "        }",
        "      } else {",
        "        startRule = ${startRule|string};",
        "      }",
        "      ",
        "      var pos = 0;",
        "      var reportMatchFailures = true;",
        "      var rightmostMatchFailuresPos = 0;",
@ -245,7 +263,7 @@ PEG.compiler.emitter = function(ast) {
        "      ",
        "      ${initializerCode}",
        "      ",
-        "      var result = parse_${startRule}();",
+        "      var result = parseFunctions[startRule]();",
        "      ",
        "      /*",
        "       * The parser is now in one of the following three states:",
@ -302,6 +320,7 @@ PEG.compiler.emitter = function(ast) {
        "})()",
        {
          initializerCode:          initializerCode,
          parseFunctionTableItems:  parseFunctionTableItems.join(",\n"),
          parseFunctionDefinitions: parseFunctionDefinitions.join("\n\n"),
          startRule:                node.startRule
        }
--- a/src/parser.js
+++ b/src/parser.js
@ -8,7 +8,73 @@ PEG.parser = (function(){
     * which the parser was generated (see |PEG.buildParser|). If the parsing is
     * unsuccessful, throws |PEG.parser.SyntaxError| describing the error.
     */
-    parse: function(input) {
+    parse: function(input, startRule) {
      var parseFunctions = {
        __: parse___,
        action: parse_action,
        and: parse_and,
        braced: parse_braced,
        bracketDelimitedCharacter: parse_bracketDelimitedCharacter,
        choice: parse_choice,
        class: parse_class,
        classCharacter: parse_classCharacter,
        classCharacterRange: parse_classCharacterRange,
        colon: parse_colon,
        comment: parse_comment,
        digit: parse_digit,
        dot: parse_dot,
        doubleQuotedCharacter: parse_doubleQuotedCharacter,
        doubleQuotedLiteral: parse_doubleQuotedLiteral,
        eol: parse_eol,
        eolChar: parse_eolChar,
        eolEscapeSequence: parse_eolEscapeSequence,
        equals: parse_equals,
        grammar: parse_grammar,
        hexDigit: parse_hexDigit,
        hexEscapeSequence: parse_hexEscapeSequence,
        identifier: parse_identifier,
        initializer: parse_initializer,
        labeled: parse_labeled,
        letter: parse_letter,
        literal: parse_literal,
        lowerCaseLetter: parse_lowerCaseLetter,
        lparen: parse_lparen,
        multiLineComment: parse_multiLineComment,
        nonBraceCharacter: parse_nonBraceCharacter,
        nonBraceCharacters: parse_nonBraceCharacters,
        not: parse_not,
        plus: parse_plus,
        prefixed: parse_prefixed,
        primary: parse_primary,
        question: parse_question,
        rparen: parse_rparen,
        rule: parse_rule,
        semicolon: parse_semicolon,
        sequence: parse_sequence,
        simpleBracketDelimitedCharacter: parse_simpleBracketDelimitedCharacter,
        simpleDoubleQuotedCharacter: parse_simpleDoubleQuotedCharacter,
        simpleEscapeSequence: parse_simpleEscapeSequence,
        simpleSingleQuotedCharacter: parse_simpleSingleQuotedCharacter,
        singleLineComment: parse_singleLineComment,
        singleQuotedCharacter: parse_singleQuotedCharacter,
        singleQuotedLiteral: parse_singleQuotedLiteral,
        slash: parse_slash,
        star: parse_star,
        suffixed: parse_suffixed,
        unicodeEscapeSequence: parse_unicodeEscapeSequence,
        upperCaseLetter: parse_upperCaseLetter,
        whitespace: parse_whitespace,
        zeroEscapeSequence: parse_zeroEscapeSequence
      };
      if (startRule !== undefined) {
        if (parseFunctions[startRule] === undefined) {
          throw new Error("Invalid rule name: " + quote(startRule) + ".");
        }
      } else {
        startRule = "grammar";
      }
      var pos = 0;
      var reportMatchFailures = true;
      var rightmostMatchFailuresPos = 0;
@ -3516,7 +3582,7 @@ PEG.parser = (function(){
-      var result = parse_grammar();
+      var result = parseFunctions[startRule]();
      /*
       * The parser is now in one of the following three states:
--- a/test/compiler-test.js
+++ b/test/compiler-test.js
@ -395,6 +395,28 @@ test("error positions", function() {
  doesNotParseWithPos(parser, "1\u2029x", 2, 1); // paragraph separator
 });
 test("start rule", function() {
  var parser = PEG.buildParser([
    'a = .* { return "alpha"; }',
    'b = .* { return "beta"; }'
  ].join("\n"));
  /* Default start rule = the first one */
  parses(parser, "whatever", "alpha");
  /* Explicit specification of the start rule */
  parsesWithStartRule(parser, "whatever", "a", "alpha");
  parsesWithStartRule(parser, "whatever", "b", "beta");
  /* Invalid rule name */
  raises(
    function() { parser.parse("whatever", "c") },
    function(e) {
      return e instanceof Error && e.message === "Invalid rule name: \"c\".";
    }
  );
 });
 /*
 * Following examples are from Wikipedia, see
 * http://en.wikipedia.org/w/index.php?title=Parsing_expression_grammar&oldid=335106938.
--- a/test/helpers.js
+++ b/test/helpers.js
@ -2,6 +2,10 @@ parses = function(parser, input, expected) {
  deepEqual(parser.parse(input), expected);
 };
 parsesWithStartRule = function(parser, input, startRule, expected) {
  deepEqual(parser.parse(input, startRule), expected);
 };
 doesNotParse = function(parser, input) {
  raises(function() { parser.parse(input); }, parser.SyntaxError);
 };