From 208cc33930c267246a0287fa3127b8fb6d5e6502 Mon Sep 17 00:00:00 2001 From: David Majda Date: Sat, 20 Oct 2012 15:58:08 +0200 Subject: [PATCH] Allowed start rules must be specified explicitly Before this commit, generated parser were able to start parsing from any rule. This was nice, but it made rule code inlining impossible. Since this commit, the list of allowed start rules has to be specified explicitly using the |allowedStartRules| option of the |PEG.buildParser| method (or the --allowed-start-rule option on the command-line). These rules will be excluded from inlining when it's implemented. --- README.md | 4 +- bin/pegjs | 27 ++++++++++--- spec/generated-parser.spec.js | 53 ++++++++++++++++++++----- src/compiler/passes/generate-code.js | 11 +++--- src/parser.js | 59 +--------------------------- 5 files changed, 76 insertions(+), 78 deletions(-) diff --git a/README.md b/README.md index d7dd5fa..4532c8b 100644 --- a/README.md +++ b/README.md @@ -55,10 +55,11 @@ If you omit both input and ouptut file, standard input and output are used. By default, the parser object is assigned to `module.exports`, which makes the output a Node.js module. You can assign it to another variable by passing a variable name using the `-e`/`--export-var` option. This may be helpful if you want to use the parser in browser environment. -You can tweak the generated parser with two options: +You can tweak the generated parser with several options: * `--cache` — makes the parser cache results, avoiding exponential parsing time in pathological cases but making the parser slower * `--track-line-and-column` — makes the parser track line and column (available as `line` and `column` variables in the actions and predicates) + * `--allowed-start-rules` — comma-separated list of rules the parser will be allowed to start parsing from (default: the first rule in the grammar) ### JavaScript API @@ -80,6 +81,7 @@ You can tweak the generated parser by passing a second parameter with an options * `cache` — if `true`, makes the parser cache results, avoiding exponential parsing time in pathological cases but making the parser slower (default: `false`) * `trackLineAndColumn` — if `true`, makes the parser track line and column (available as `line` and `column` variables in the actions and predicates) (default: `false`) + * `allowedStartRules` — rules the parser will be allowed to start parsing from (default: the first rule in the grammar) Using the Parser ---------------- diff --git a/bin/pegjs b/bin/pegjs index 23d74db..0466617 100755 --- a/bin/pegjs +++ b/bin/pegjs @@ -21,12 +21,17 @@ function printHelp() { util.puts("omitted, standard input and output are used."); util.puts(""); util.puts("Options:"); - util.puts(" -e, --export-var name of the variable where the parser object"); - util.puts(" will be stored (default: \"module.exports\")"); - util.puts(" --cache make generated parser cache results"); - util.puts(" --track-line-and-column make generated parser track line and column"); - util.puts(" -v, --version print version information and exit"); - util.puts(" -h, --help print help and exit"); + util.puts(" -e, --export-var name of the variable where the parser"); + util.puts(" object will be stored (default:"); + util.puts(" \"module.exports\")"); + util.puts(" --cache make generated parser cache results"); + util.puts(" --track-line-and-column make generated parser track line and column"); + util.puts(" --allowed-start-rules comma-separated list of rules the generated"); + util.puts(" parser will be allowed to start parsing"); + util.puts(" from (default: the first rule in the"); + util.puts(" grammar)"); + util.puts(" -v, --version print version information and exit"); + util.puts(" -h, --help print help and exit"); } function exitSuccess() { @@ -90,6 +95,16 @@ while (args.length > 0 && isOption(args[0])) { options.trackLineAndColumn = true; break; + case "--allowed-start-rules": + nextArg(); + if (args.length === 0) { + abort("Missing parameter of the -e/--allowed-start-rules option."); + } + options.allowedStartRules = args[0] + .split(",") + .map(function(s) { return s.trim() }); + break; + case "-v": case "--version": printVersion(); diff --git a/spec/generated-parser.spec.js b/spec/generated-parser.spec.js index 2c6fd94..846a9ea 100644 --- a/spec/generated-parser.spec.js +++ b/spec/generated-parser.spec.js @@ -135,27 +135,28 @@ describe("generated parser", function() { describe("parse", function() { var parser = PEG.buildParser([ 'a = "x" { return "a"; }', - 'b = "x" { return "b"; }' - ].join("\n")); + 'b = "x" { return "b"; }', + 'c = "x" { return "c"; }' + ].join("\n"), { allowedStartRules: ["b", "c"] }); describe("start rule", function() { describe("without the |startRule| option", function() { - it("uses the first rule", function() { - expect(parser).toParse("x", "a"); + it("uses the first allowed rule", function() { + expect(parser).toParse("x", "b"); }); }); - describe("when the |startRule| option specifies existing rule", function() { + describe("when the |startRule| option specifies allowed rule", function() { it("uses the specified rule", function() { - expect(parser).toParse("x", { startRule: "a" }, "a"); expect(parser).toParse("x", { startRule: "b" }, "b"); + expect(parser).toParse("x", { startRule: "c" }, "c"); }); }); - describe("when the |startRule| option specifies non-existent rule", function() { + describe("when the |startRule| option specifies disallowed rule", function() { it("throws exception", function() { - expect(parser).toFailToParse("x", { startRule: "c" }, { - message: "Invalid rule name: \"c\"." + expect(parser).toFailToParse("x", { startRule: "a" }, { + message: "Can't start parsing from rule \"a\"." }); }); }); @@ -899,6 +900,40 @@ describe("generated parser", function() { }); }); + describe("allowed start rules", function() { + var grammar = [ + 'a = "x"', + 'b = "x"', + 'c = "x"' + ].join("\n"); + + describe("without the |allowedStartRules| option", function() { + var parser = PEG.buildParser(grammar); + + it("allows the first rule", function() { + expect(parser).toParse("x", { startRule: "a" }, "x"); + }); + + it("does not allow any other rules", function() { + expect(parser).toFailToParse("x", { startRule: "b" }, { }); + expect(parser).toFailToParse("x", { startRule: "c" }, { }); + }); + }); + + describe("with the |allowedStartRules| option", function() { + var parser = PEG.buildParser(grammar, { allowedStartRules: ["b", "c"] }); + + it("allows the specified rules", function() { + expect(parser).toParse("x", { startRule: "b" }, "x"); + expect(parser).toParse("x", { startRule: "c" }, "x"); + }); + + it("does not allow any other rules", function() { + expect(parser).toFailToParse("x", { startRule: "a" }, { }); + }); + }); + }); + /* * Following examples are from Wikipedia, see * http://en.wikipedia.org/w/index.php?title=Parsing_expression_grammar&oldid=335106938. diff --git a/src/compiler/passes/generate-code.js b/src/compiler/passes/generate-code.js index 16c1067..80eb225 100644 --- a/src/compiler/passes/generate-code.js +++ b/src/compiler/passes/generate-code.js @@ -3,7 +3,8 @@ PEG.compiler.passes.generateCode = function(ast, options) { options = clone(options) || {}; defaults(options, { cache: false, - trackLineAndColumn: false + trackLineAndColumn: false, + allowedStartRules: [ast.startRule] }); /* @@ -305,8 +306,8 @@ PEG.compiler.passes.generateCode = function(ast, options) { ' */', ' parse: function(input) {', ' var parseFunctions = {', - ' #for rule in node.rules', - ' #{string(rule.name) + ": parse_" + rule.name + (rule !== node.rules[node.rules.length - 1] ? "," : "")}', + ' #for rule in options.allowedStartRules', + ' #{string(rule) + ": parse_" + rule + (rule !== options.allowedStartRules[options.allowedStartRules.length - 1] ? "," : "")}', ' #end', ' };', ' ', @@ -317,10 +318,10 @@ PEG.compiler.passes.generateCode = function(ast, options) { ' startRule = options.startRule;', ' ', ' if (parseFunctions[startRule] === undefined) {', - ' throw new Error("Invalid rule name: " + quote(startRule) + ".");', + ' throw new Error("Can\'t start parsing from rule " + quote(startRule) + ".");', ' }', ' } else {', - ' startRule = #{string(node.startRule)};', + ' startRule = #{string(options.allowedStartRules[0])};', ' }', ' ', ' #{posInit("pos")};', diff --git a/src/parser.js b/src/parser.js index b8ea8a9..d9f3c10 100644 --- a/src/parser.js +++ b/src/parser.js @@ -37,62 +37,7 @@ PEG.parser = (function(){ */ parse: function(input) { var parseFunctions = { - "grammar": parse_grammar, - "initializer": parse_initializer, - "rule": parse_rule, - "choice": parse_choice, - "sequence": parse_sequence, - "labeled": parse_labeled, - "prefixed": parse_prefixed, - "suffixed": parse_suffixed, - "primary": parse_primary, - "action": parse_action, - "braced": parse_braced, - "nonBraceCharacters": parse_nonBraceCharacters, - "nonBraceCharacter": parse_nonBraceCharacter, - "equals": parse_equals, - "colon": parse_colon, - "semicolon": parse_semicolon, - "slash": parse_slash, - "and": parse_and, - "not": parse_not, - "question": parse_question, - "star": parse_star, - "plus": parse_plus, - "lparen": parse_lparen, - "rparen": parse_rparen, - "dot": parse_dot, - "identifier": parse_identifier, - "literal": parse_literal, - "string": parse_string, - "doubleQuotedString": parse_doubleQuotedString, - "doubleQuotedCharacter": parse_doubleQuotedCharacter, - "simpleDoubleQuotedCharacter": parse_simpleDoubleQuotedCharacter, - "singleQuotedString": parse_singleQuotedString, - "singleQuotedCharacter": parse_singleQuotedCharacter, - "simpleSingleQuotedCharacter": parse_simpleSingleQuotedCharacter, - "class": parse_class, - "classCharacterRange": parse_classCharacterRange, - "classCharacter": parse_classCharacter, - "bracketDelimitedCharacter": parse_bracketDelimitedCharacter, - "simpleBracketDelimitedCharacter": parse_simpleBracketDelimitedCharacter, - "simpleEscapeSequence": parse_simpleEscapeSequence, - "zeroEscapeSequence": parse_zeroEscapeSequence, - "hexEscapeSequence": parse_hexEscapeSequence, - "unicodeEscapeSequence": parse_unicodeEscapeSequence, - "eolEscapeSequence": parse_eolEscapeSequence, - "digit": parse_digit, - "hexDigit": parse_hexDigit, - "letter": parse_letter, - "lowerCaseLetter": parse_lowerCaseLetter, - "upperCaseLetter": parse_upperCaseLetter, - "__": parse___, - "comment": parse_comment, - "singleLineComment": parse_singleLineComment, - "multiLineComment": parse_multiLineComment, - "eol": parse_eol, - "eolChar": parse_eolChar, - "whitespace": parse_whitespace + "grammar": parse_grammar }; var options = arguments.length > 1 ? arguments[1] : {}, @@ -102,7 +47,7 @@ PEG.parser = (function(){ startRule = options.startRule; if (parseFunctions[startRule] === undefined) { - throw new Error("Invalid rule name: " + quote(startRule) + "."); + throw new Error("Can't start parsing from rule " + quote(startRule) + "."); } } else { startRule = "grammar";