From cc7ad9739f1fd8811dbc27798022f619e3a43530 Mon Sep 17 00:00:00 2001 From: David Majda Date: Tue, 29 Mar 2011 15:40:34 +0200 Subject: [PATCH] Add ability to start parsing from any grammar rule Calling the parsing function could have been done without the ugly table using |eval|, but this seemed to degrade performance significantly (by about 3 %). This is probably because engines optimize badly in presence of |eval|. The method used in this patch does not change the benchmark suite execution speed statistically significantly on V8. Detailed results (benchmark suite totals): --------------------------------- Test # Before After --------------------------------- 1 38.24 kB/s 38.28 kB/s 2 38.35 kB/s 38.15 kB/s 3 38.43 kB/s 38.40 kB/s 4 38.53 kB/s 38.20 kB/s 5 38.25 kB/s 38.39 kB/s --------------------------------- Average 38.36 kB/s 38.39 kB/s --------------------------------- Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.151 Safari/534.1 --- src/emitter.js | 23 ++++++++++++-- src/parser.js | 70 +++++++++++++++++++++++++++++++++++++++++-- test/compiler-test.js | 22 ++++++++++++++ test/helpers.js | 4 +++ 4 files changed, 115 insertions(+), 4 deletions(-) diff --git a/src/emitter.js b/src/emitter.js index 43543e1..64895d5 100644 --- a/src/emitter.js +++ b/src/emitter.js @@ -96,6 +96,12 @@ PEG.compiler.emitter = function(ast) { ? emit(node.initializer) : ""; + var parseFunctionTableItems = []; + for (var name in node.rules) { + parseFunctionTableItems.push(name + ": parse_" + name); + } + parseFunctionTableItems.sort(); + var parseFunctionDefinitions = []; for (var name in node.rules) { parseFunctionDefinitions.push(emit(node.rules[name])); @@ -112,7 +118,19 @@ PEG.compiler.emitter = function(ast) { " * which the parser was generated (see |PEG.buildParser|). If the parsing is", " * unsuccessful, throws |PEG.parser.SyntaxError| describing the error.", " */", - " parse: function(input) {", + " parse: function(input, startRule) {", + " var parseFunctions = {", + " ${parseFunctionTableItems}", + " };", + " ", + " if (startRule !== undefined) {", + " if (parseFunctions[startRule] === undefined) {", + " throw new Error(\"Invalid rule name: \" + quote(startRule) + \".\");", + " }", + " } else {", + " startRule = ${startRule|string};", + " }", + " ", " var pos = 0;", " var reportMatchFailures = true;", " var rightmostMatchFailuresPos = 0;", @@ -245,7 +263,7 @@ PEG.compiler.emitter = function(ast) { " ", " ${initializerCode}", " ", - " var result = parse_${startRule}();", + " var result = parseFunctions[startRule]();", " ", " /*", " * The parser is now in one of the following three states:", @@ -302,6 +320,7 @@ PEG.compiler.emitter = function(ast) { "})()", { initializerCode: initializerCode, + parseFunctionTableItems: parseFunctionTableItems.join(",\n"), parseFunctionDefinitions: parseFunctionDefinitions.join("\n\n"), startRule: node.startRule } diff --git a/src/parser.js b/src/parser.js index 55f06cd..71be1dd 100644 --- a/src/parser.js +++ b/src/parser.js @@ -8,7 +8,73 @@ PEG.parser = (function(){ * which the parser was generated (see |PEG.buildParser|). If the parsing is * unsuccessful, throws |PEG.parser.SyntaxError| describing the error. */ - parse: function(input) { + parse: function(input, startRule) { + var parseFunctions = { + __: parse___, + action: parse_action, + and: parse_and, + braced: parse_braced, + bracketDelimitedCharacter: parse_bracketDelimitedCharacter, + choice: parse_choice, + class: parse_class, + classCharacter: parse_classCharacter, + classCharacterRange: parse_classCharacterRange, + colon: parse_colon, + comment: parse_comment, + digit: parse_digit, + dot: parse_dot, + doubleQuotedCharacter: parse_doubleQuotedCharacter, + doubleQuotedLiteral: parse_doubleQuotedLiteral, + eol: parse_eol, + eolChar: parse_eolChar, + eolEscapeSequence: parse_eolEscapeSequence, + equals: parse_equals, + grammar: parse_grammar, + hexDigit: parse_hexDigit, + hexEscapeSequence: parse_hexEscapeSequence, + identifier: parse_identifier, + initializer: parse_initializer, + labeled: parse_labeled, + letter: parse_letter, + literal: parse_literal, + lowerCaseLetter: parse_lowerCaseLetter, + lparen: parse_lparen, + multiLineComment: parse_multiLineComment, + nonBraceCharacter: parse_nonBraceCharacter, + nonBraceCharacters: parse_nonBraceCharacters, + not: parse_not, + plus: parse_plus, + prefixed: parse_prefixed, + primary: parse_primary, + question: parse_question, + rparen: parse_rparen, + rule: parse_rule, + semicolon: parse_semicolon, + sequence: parse_sequence, + simpleBracketDelimitedCharacter: parse_simpleBracketDelimitedCharacter, + simpleDoubleQuotedCharacter: parse_simpleDoubleQuotedCharacter, + simpleEscapeSequence: parse_simpleEscapeSequence, + simpleSingleQuotedCharacter: parse_simpleSingleQuotedCharacter, + singleLineComment: parse_singleLineComment, + singleQuotedCharacter: parse_singleQuotedCharacter, + singleQuotedLiteral: parse_singleQuotedLiteral, + slash: parse_slash, + star: parse_star, + suffixed: parse_suffixed, + unicodeEscapeSequence: parse_unicodeEscapeSequence, + upperCaseLetter: parse_upperCaseLetter, + whitespace: parse_whitespace, + zeroEscapeSequence: parse_zeroEscapeSequence + }; + + if (startRule !== undefined) { + if (parseFunctions[startRule] === undefined) { + throw new Error("Invalid rule name: " + quote(startRule) + "."); + } + } else { + startRule = "grammar"; + } + var pos = 0; var reportMatchFailures = true; var rightmostMatchFailuresPos = 0; @@ -3516,7 +3582,7 @@ PEG.parser = (function(){ - var result = parse_grammar(); + var result = parseFunctions[startRule](); /* * The parser is now in one of the following three states: diff --git a/test/compiler-test.js b/test/compiler-test.js index ca5a16f..d152c65 100644 --- a/test/compiler-test.js +++ b/test/compiler-test.js @@ -395,6 +395,28 @@ test("error positions", function() { doesNotParseWithPos(parser, "1\u2029x", 2, 1); // paragraph separator }); +test("start rule", function() { + var parser = PEG.buildParser([ + 'a = .* { return "alpha"; }', + 'b = .* { return "beta"; }' + ].join("\n")); + + /* Default start rule = the first one */ + parses(parser, "whatever", "alpha"); + + /* Explicit specification of the start rule */ + parsesWithStartRule(parser, "whatever", "a", "alpha"); + parsesWithStartRule(parser, "whatever", "b", "beta"); + + /* Invalid rule name */ + raises( + function() { parser.parse("whatever", "c") }, + function(e) { + return e instanceof Error && e.message === "Invalid rule name: \"c\"."; + } + ); +}); + /* * Following examples are from Wikipedia, see * http://en.wikipedia.org/w/index.php?title=Parsing_expression_grammar&oldid=335106938. diff --git a/test/helpers.js b/test/helpers.js index 180b780..512a5a8 100644 --- a/test/helpers.js +++ b/test/helpers.js @@ -2,6 +2,10 @@ parses = function(parser, input, expected) { deepEqual(parser.parse(input), expected); }; +parsesWithStartRule = function(parser, input, startRule, expected) { + deepEqual(parser.parse(input, startRule), expected); +}; + doesNotParse = function(parser, input) { raises(function() { parser.parse(input); }, parser.SyntaxError); };