Add ability to start parsing from any grammar rule

Calling the parsing function could have been done without the ugly table
using |eval|, but this seemed to degrade performance significantly (by
about 3 %). This is probably because engines optimize badly in presence
of |eval|.

The method used in this patch does not change the benchmark suite
execution speed statistically significantly on V8.

Detailed results (benchmark suite totals):

---------------------------------
 Test #     Before       After
---------------------------------
      1   38.24 kB/s   38.28 kB/s
      2   38.35 kB/s   38.15 kB/s
      3   38.43 kB/s   38.40 kB/s
      4   38.53 kB/s   38.20 kB/s
      5   38.25 kB/s   38.39 kB/s
---------------------------------
Average   38.36 kB/s   38.39 kB/s
---------------------------------

Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.151 Safari/534.1
This commit is contained in:
David Majda 2011-03-29 15:40:34 +02:00
parent 1e57bf778d
commit cc7ad9739f
4 changed files with 115 additions and 4 deletions

View file

@ -96,6 +96,12 @@ PEG.compiler.emitter = function(ast) {
? emit(node.initializer) ? emit(node.initializer)
: ""; : "";
var parseFunctionTableItems = [];
for (var name in node.rules) {
parseFunctionTableItems.push(name + ": parse_" + name);
}
parseFunctionTableItems.sort();
var parseFunctionDefinitions = []; var parseFunctionDefinitions = [];
for (var name in node.rules) { for (var name in node.rules) {
parseFunctionDefinitions.push(emit(node.rules[name])); parseFunctionDefinitions.push(emit(node.rules[name]));
@ -112,7 +118,19 @@ PEG.compiler.emitter = function(ast) {
" * which the parser was generated (see |PEG.buildParser|). If the parsing is", " * which the parser was generated (see |PEG.buildParser|). If the parsing is",
" * unsuccessful, throws |PEG.parser.SyntaxError| describing the error.", " * unsuccessful, throws |PEG.parser.SyntaxError| describing the error.",
" */", " */",
" parse: function(input) {", " parse: function(input, startRule) {",
" var parseFunctions = {",
" ${parseFunctionTableItems}",
" };",
" ",
" if (startRule !== undefined) {",
" if (parseFunctions[startRule] === undefined) {",
" throw new Error(\"Invalid rule name: \" + quote(startRule) + \".\");",
" }",
" } else {",
" startRule = ${startRule|string};",
" }",
" ",
" var pos = 0;", " var pos = 0;",
" var reportMatchFailures = true;", " var reportMatchFailures = true;",
" var rightmostMatchFailuresPos = 0;", " var rightmostMatchFailuresPos = 0;",
@ -245,7 +263,7 @@ PEG.compiler.emitter = function(ast) {
" ", " ",
" ${initializerCode}", " ${initializerCode}",
" ", " ",
" var result = parse_${startRule}();", " var result = parseFunctions[startRule]();",
" ", " ",
" /*", " /*",
" * The parser is now in one of the following three states:", " * The parser is now in one of the following three states:",
@ -302,6 +320,7 @@ PEG.compiler.emitter = function(ast) {
"})()", "})()",
{ {
initializerCode: initializerCode, initializerCode: initializerCode,
parseFunctionTableItems: parseFunctionTableItems.join(",\n"),
parseFunctionDefinitions: parseFunctionDefinitions.join("\n\n"), parseFunctionDefinitions: parseFunctionDefinitions.join("\n\n"),
startRule: node.startRule startRule: node.startRule
} }

View file

@ -8,7 +8,73 @@ PEG.parser = (function(){
* which the parser was generated (see |PEG.buildParser|). If the parsing is * which the parser was generated (see |PEG.buildParser|). If the parsing is
* unsuccessful, throws |PEG.parser.SyntaxError| describing the error. * unsuccessful, throws |PEG.parser.SyntaxError| describing the error.
*/ */
parse: function(input) { parse: function(input, startRule) {
var parseFunctions = {
__: parse___,
action: parse_action,
and: parse_and,
braced: parse_braced,
bracketDelimitedCharacter: parse_bracketDelimitedCharacter,
choice: parse_choice,
class: parse_class,
classCharacter: parse_classCharacter,
classCharacterRange: parse_classCharacterRange,
colon: parse_colon,
comment: parse_comment,
digit: parse_digit,
dot: parse_dot,
doubleQuotedCharacter: parse_doubleQuotedCharacter,
doubleQuotedLiteral: parse_doubleQuotedLiteral,
eol: parse_eol,
eolChar: parse_eolChar,
eolEscapeSequence: parse_eolEscapeSequence,
equals: parse_equals,
grammar: parse_grammar,
hexDigit: parse_hexDigit,
hexEscapeSequence: parse_hexEscapeSequence,
identifier: parse_identifier,
initializer: parse_initializer,
labeled: parse_labeled,
letter: parse_letter,
literal: parse_literal,
lowerCaseLetter: parse_lowerCaseLetter,
lparen: parse_lparen,
multiLineComment: parse_multiLineComment,
nonBraceCharacter: parse_nonBraceCharacter,
nonBraceCharacters: parse_nonBraceCharacters,
not: parse_not,
plus: parse_plus,
prefixed: parse_prefixed,
primary: parse_primary,
question: parse_question,
rparen: parse_rparen,
rule: parse_rule,
semicolon: parse_semicolon,
sequence: parse_sequence,
simpleBracketDelimitedCharacter: parse_simpleBracketDelimitedCharacter,
simpleDoubleQuotedCharacter: parse_simpleDoubleQuotedCharacter,
simpleEscapeSequence: parse_simpleEscapeSequence,
simpleSingleQuotedCharacter: parse_simpleSingleQuotedCharacter,
singleLineComment: parse_singleLineComment,
singleQuotedCharacter: parse_singleQuotedCharacter,
singleQuotedLiteral: parse_singleQuotedLiteral,
slash: parse_slash,
star: parse_star,
suffixed: parse_suffixed,
unicodeEscapeSequence: parse_unicodeEscapeSequence,
upperCaseLetter: parse_upperCaseLetter,
whitespace: parse_whitespace,
zeroEscapeSequence: parse_zeroEscapeSequence
};
if (startRule !== undefined) {
if (parseFunctions[startRule] === undefined) {
throw new Error("Invalid rule name: " + quote(startRule) + ".");
}
} else {
startRule = "grammar";
}
var pos = 0; var pos = 0;
var reportMatchFailures = true; var reportMatchFailures = true;
var rightmostMatchFailuresPos = 0; var rightmostMatchFailuresPos = 0;
@ -3516,7 +3582,7 @@ PEG.parser = (function(){
var result = parse_grammar(); var result = parseFunctions[startRule]();
/* /*
* The parser is now in one of the following three states: * The parser is now in one of the following three states:

View file

@ -395,6 +395,28 @@ test("error positions", function() {
doesNotParseWithPos(parser, "1\u2029x", 2, 1); // paragraph separator doesNotParseWithPos(parser, "1\u2029x", 2, 1); // paragraph separator
}); });
test("start rule", function() {
var parser = PEG.buildParser([
'a = .* { return "alpha"; }',
'b = .* { return "beta"; }'
].join("\n"));
/* Default start rule = the first one */
parses(parser, "whatever", "alpha");
/* Explicit specification of the start rule */
parsesWithStartRule(parser, "whatever", "a", "alpha");
parsesWithStartRule(parser, "whatever", "b", "beta");
/* Invalid rule name */
raises(
function() { parser.parse("whatever", "c") },
function(e) {
return e instanceof Error && e.message === "Invalid rule name: \"c\".";
}
);
});
/* /*
* Following examples are from Wikipedia, see * Following examples are from Wikipedia, see
* http://en.wikipedia.org/w/index.php?title=Parsing_expression_grammar&oldid=335106938. * http://en.wikipedia.org/w/index.php?title=Parsing_expression_grammar&oldid=335106938.

View file

@ -2,6 +2,10 @@ parses = function(parser, input, expected) {
deepEqual(parser.parse(input), expected); deepEqual(parser.parse(input), expected);
}; };
parsesWithStartRule = function(parser, input, startRule, expected) {
deepEqual(parser.parse(input, startRule), expected);
};
doesNotParse = function(parser, input) { doesNotParse = function(parser, input) {
raises(function() { parser.parse(input); }, parser.SyntaxError); raises(function() { parser.parse(input); }, parser.SyntaxError);
}; };