Add ability to start parsing from any grammar rule
Calling the parsing function could have been done without the ugly table using |eval|, but this seemed to degrade performance significantly (by about 3 %). This is probably because engines optimize badly in presence of |eval|. The method used in this patch does not change the benchmark suite execution speed statistically significantly on V8. Detailed results (benchmark suite totals): --------------------------------- Test # Before After --------------------------------- 1 38.24 kB/s 38.28 kB/s 2 38.35 kB/s 38.15 kB/s 3 38.43 kB/s 38.40 kB/s 4 38.53 kB/s 38.20 kB/s 5 38.25 kB/s 38.39 kB/s --------------------------------- Average 38.36 kB/s 38.39 kB/s --------------------------------- Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.151 Safari/534.1
This commit is contained in:
parent
1e57bf778d
commit
cc7ad9739f
|
@ -96,6 +96,12 @@ PEG.compiler.emitter = function(ast) {
|
|||
? emit(node.initializer)
|
||||
: "";
|
||||
|
||||
var parseFunctionTableItems = [];
|
||||
for (var name in node.rules) {
|
||||
parseFunctionTableItems.push(name + ": parse_" + name);
|
||||
}
|
||||
parseFunctionTableItems.sort();
|
||||
|
||||
var parseFunctionDefinitions = [];
|
||||
for (var name in node.rules) {
|
||||
parseFunctionDefinitions.push(emit(node.rules[name]));
|
||||
|
@ -112,7 +118,19 @@ PEG.compiler.emitter = function(ast) {
|
|||
" * which the parser was generated (see |PEG.buildParser|). If the parsing is",
|
||||
" * unsuccessful, throws |PEG.parser.SyntaxError| describing the error.",
|
||||
" */",
|
||||
" parse: function(input) {",
|
||||
" parse: function(input, startRule) {",
|
||||
" var parseFunctions = {",
|
||||
" ${parseFunctionTableItems}",
|
||||
" };",
|
||||
" ",
|
||||
" if (startRule !== undefined) {",
|
||||
" if (parseFunctions[startRule] === undefined) {",
|
||||
" throw new Error(\"Invalid rule name: \" + quote(startRule) + \".\");",
|
||||
" }",
|
||||
" } else {",
|
||||
" startRule = ${startRule|string};",
|
||||
" }",
|
||||
" ",
|
||||
" var pos = 0;",
|
||||
" var reportMatchFailures = true;",
|
||||
" var rightmostMatchFailuresPos = 0;",
|
||||
|
@ -245,7 +263,7 @@ PEG.compiler.emitter = function(ast) {
|
|||
" ",
|
||||
" ${initializerCode}",
|
||||
" ",
|
||||
" var result = parse_${startRule}();",
|
||||
" var result = parseFunctions[startRule]();",
|
||||
" ",
|
||||
" /*",
|
||||
" * The parser is now in one of the following three states:",
|
||||
|
@ -302,6 +320,7 @@ PEG.compiler.emitter = function(ast) {
|
|||
"})()",
|
||||
{
|
||||
initializerCode: initializerCode,
|
||||
parseFunctionTableItems: parseFunctionTableItems.join(",\n"),
|
||||
parseFunctionDefinitions: parseFunctionDefinitions.join("\n\n"),
|
||||
startRule: node.startRule
|
||||
}
|
||||
|
|
|
@ -8,7 +8,73 @@ PEG.parser = (function(){
|
|||
* which the parser was generated (see |PEG.buildParser|). If the parsing is
|
||||
* unsuccessful, throws |PEG.parser.SyntaxError| describing the error.
|
||||
*/
|
||||
parse: function(input) {
|
||||
parse: function(input, startRule) {
|
||||
var parseFunctions = {
|
||||
__: parse___,
|
||||
action: parse_action,
|
||||
and: parse_and,
|
||||
braced: parse_braced,
|
||||
bracketDelimitedCharacter: parse_bracketDelimitedCharacter,
|
||||
choice: parse_choice,
|
||||
class: parse_class,
|
||||
classCharacter: parse_classCharacter,
|
||||
classCharacterRange: parse_classCharacterRange,
|
||||
colon: parse_colon,
|
||||
comment: parse_comment,
|
||||
digit: parse_digit,
|
||||
dot: parse_dot,
|
||||
doubleQuotedCharacter: parse_doubleQuotedCharacter,
|
||||
doubleQuotedLiteral: parse_doubleQuotedLiteral,
|
||||
eol: parse_eol,
|
||||
eolChar: parse_eolChar,
|
||||
eolEscapeSequence: parse_eolEscapeSequence,
|
||||
equals: parse_equals,
|
||||
grammar: parse_grammar,
|
||||
hexDigit: parse_hexDigit,
|
||||
hexEscapeSequence: parse_hexEscapeSequence,
|
||||
identifier: parse_identifier,
|
||||
initializer: parse_initializer,
|
||||
labeled: parse_labeled,
|
||||
letter: parse_letter,
|
||||
literal: parse_literal,
|
||||
lowerCaseLetter: parse_lowerCaseLetter,
|
||||
lparen: parse_lparen,
|
||||
multiLineComment: parse_multiLineComment,
|
||||
nonBraceCharacter: parse_nonBraceCharacter,
|
||||
nonBraceCharacters: parse_nonBraceCharacters,
|
||||
not: parse_not,
|
||||
plus: parse_plus,
|
||||
prefixed: parse_prefixed,
|
||||
primary: parse_primary,
|
||||
question: parse_question,
|
||||
rparen: parse_rparen,
|
||||
rule: parse_rule,
|
||||
semicolon: parse_semicolon,
|
||||
sequence: parse_sequence,
|
||||
simpleBracketDelimitedCharacter: parse_simpleBracketDelimitedCharacter,
|
||||
simpleDoubleQuotedCharacter: parse_simpleDoubleQuotedCharacter,
|
||||
simpleEscapeSequence: parse_simpleEscapeSequence,
|
||||
simpleSingleQuotedCharacter: parse_simpleSingleQuotedCharacter,
|
||||
singleLineComment: parse_singleLineComment,
|
||||
singleQuotedCharacter: parse_singleQuotedCharacter,
|
||||
singleQuotedLiteral: parse_singleQuotedLiteral,
|
||||
slash: parse_slash,
|
||||
star: parse_star,
|
||||
suffixed: parse_suffixed,
|
||||
unicodeEscapeSequence: parse_unicodeEscapeSequence,
|
||||
upperCaseLetter: parse_upperCaseLetter,
|
||||
whitespace: parse_whitespace,
|
||||
zeroEscapeSequence: parse_zeroEscapeSequence
|
||||
};
|
||||
|
||||
if (startRule !== undefined) {
|
||||
if (parseFunctions[startRule] === undefined) {
|
||||
throw new Error("Invalid rule name: " + quote(startRule) + ".");
|
||||
}
|
||||
} else {
|
||||
startRule = "grammar";
|
||||
}
|
||||
|
||||
var pos = 0;
|
||||
var reportMatchFailures = true;
|
||||
var rightmostMatchFailuresPos = 0;
|
||||
|
@ -3516,7 +3582,7 @@ PEG.parser = (function(){
|
|||
|
||||
|
||||
|
||||
var result = parse_grammar();
|
||||
var result = parseFunctions[startRule]();
|
||||
|
||||
/*
|
||||
* The parser is now in one of the following three states:
|
||||
|
|
|
@ -395,6 +395,28 @@ test("error positions", function() {
|
|||
doesNotParseWithPos(parser, "1\u2029x", 2, 1); // paragraph separator
|
||||
});
|
||||
|
||||
test("start rule", function() {
|
||||
var parser = PEG.buildParser([
|
||||
'a = .* { return "alpha"; }',
|
||||
'b = .* { return "beta"; }'
|
||||
].join("\n"));
|
||||
|
||||
/* Default start rule = the first one */
|
||||
parses(parser, "whatever", "alpha");
|
||||
|
||||
/* Explicit specification of the start rule */
|
||||
parsesWithStartRule(parser, "whatever", "a", "alpha");
|
||||
parsesWithStartRule(parser, "whatever", "b", "beta");
|
||||
|
||||
/* Invalid rule name */
|
||||
raises(
|
||||
function() { parser.parse("whatever", "c") },
|
||||
function(e) {
|
||||
return e instanceof Error && e.message === "Invalid rule name: \"c\".";
|
||||
}
|
||||
);
|
||||
});
|
||||
|
||||
/*
|
||||
* Following examples are from Wikipedia, see
|
||||
* http://en.wikipedia.org/w/index.php?title=Parsing_expression_grammar&oldid=335106938.
|
||||
|
|
|
@ -2,6 +2,10 @@ parses = function(parser, input, expected) {
|
|||
deepEqual(parser.parse(input), expected);
|
||||
};
|
||||
|
||||
parsesWithStartRule = function(parser, input, startRule, expected) {
|
||||
deepEqual(parser.parse(input, startRule), expected);
|
||||
};
|
||||
|
||||
doesNotParse = function(parser, input) {
|
||||
raises(function() { parser.parse(input); }, parser.SyntaxError);
|
||||
};
|
||||
|
|
Loading…
Reference in a new issue