Browse Source

Allowed start rules must be specified explicitly

Before this commit, generated parser were able to start parsing from any
rule. This was nice, but it made rule code inlining impossible.

Since this commit, the list of allowed start rules has to be specified
explicitly using the |allowedStartRules| option of the |PEG.buildParser|
method (or the --allowed-start-rule option on the command-line). These
rules will be excluded from inlining when it's implemented.
redux
David Majda 9 years ago
parent
commit
208cc33930
  1. 4
      README.md
  2. 27
      bin/pegjs
  3. 53
      spec/generated-parser.spec.js
  4. 11
      src/compiler/passes/generate-code.js
  5. 59
      src/parser.js

4
README.md

@ -55,10 +55,11 @@ If you omit both input and ouptut file, standard input and output are used.
By default, the parser object is assigned to `module.exports`, which makes the output a Node.js module. You can assign it to another variable by passing a variable name using the `-e`/`--export-var` option. This may be helpful if you want to use the parser in browser environment.
You can tweak the generated parser with two options:
You can tweak the generated parser with several options:
* `--cache` — makes the parser cache results, avoiding exponential parsing time in pathological cases but making the parser slower
* `--track-line-and-column` — makes the parser track line and column (available as `line` and `column` variables in the actions and predicates)
* `--allowed-start-rules` — comma-separated list of rules the parser will be allowed to start parsing from (default: the first rule in the grammar)
### JavaScript API
@ -80,6 +81,7 @@ You can tweak the generated parser by passing a second parameter with an options
* `cache` — if `true`, makes the parser cache results, avoiding exponential parsing time in pathological cases but making the parser slower (default: `false`)
* `trackLineAndColumn` — if `true`, makes the parser track line and column (available as `line` and `column` variables in the actions and predicates) (default: `false`)
* `allowedStartRules` — rules the parser will be allowed to start parsing from (default: the first rule in the grammar)
Using the Parser
----------------

27
bin/pegjs

@ -21,12 +21,17 @@ function printHelp() {
util.puts("omitted, standard input and output are used.");
util.puts("");
util.puts("Options:");
util.puts(" -e, --export-var <variable> name of the variable where the parser object");
util.puts(" will be stored (default: \"module.exports\")");
util.puts(" --cache make generated parser cache results");
util.puts(" --track-line-and-column make generated parser track line and column");
util.puts(" -v, --version print version information and exit");
util.puts(" -h, --help print help and exit");
util.puts(" -e, --export-var <variable> name of the variable where the parser");
util.puts(" object will be stored (default:");
util.puts(" \"module.exports\")");
util.puts(" --cache make generated parser cache results");
util.puts(" --track-line-and-column make generated parser track line and column");
util.puts(" --allowed-start-rules <rules> comma-separated list of rules the generated");
util.puts(" parser will be allowed to start parsing");
util.puts(" from (default: the first rule in the");
util.puts(" grammar)");
util.puts(" -v, --version print version information and exit");
util.puts(" -h, --help print help and exit");
}
function exitSuccess() {
@ -90,6 +95,16 @@ while (args.length > 0 && isOption(args[0])) {
options.trackLineAndColumn = true;
break;
case "--allowed-start-rules":
nextArg();
if (args.length === 0) {
abort("Missing parameter of the -e/--allowed-start-rules option.");
}
options.allowedStartRules = args[0]
.split(",")
.map(function(s) { return s.trim() });
break;
case "-v":
case "--version":
printVersion();

53
spec/generated-parser.spec.js

@ -135,27 +135,28 @@ describe("generated parser", function() {
describe("parse", function() {
var parser = PEG.buildParser([
'a = "x" { return "a"; }',
'b = "x" { return "b"; }'
].join("\n"));
'b = "x" { return "b"; }',
'c = "x" { return "c"; }'
].join("\n"), { allowedStartRules: ["b", "c"] });
describe("start rule", function() {
describe("without the |startRule| option", function() {
it("uses the first rule", function() {
expect(parser).toParse("x", "a");
it("uses the first allowed rule", function() {
expect(parser).toParse("x", "b");
});
});
describe("when the |startRule| option specifies existing rule", function() {
describe("when the |startRule| option specifies allowed rule", function() {
it("uses the specified rule", function() {
expect(parser).toParse("x", { startRule: "a" }, "a");
expect(parser).toParse("x", { startRule: "b" }, "b");
expect(parser).toParse("x", { startRule: "c" }, "c");
});
});
describe("when the |startRule| option specifies non-existent rule", function() {
describe("when the |startRule| option specifies disallowed rule", function() {
it("throws exception", function() {
expect(parser).toFailToParse("x", { startRule: "c" }, {
message: "Invalid rule name: \"c\"."
expect(parser).toFailToParse("x", { startRule: "a" }, {
message: "Can't start parsing from rule \"a\"."
});
});
});
@ -899,6 +900,40 @@ describe("generated parser", function() {
});
});
describe("allowed start rules", function() {
var grammar = [
'a = "x"',
'b = "x"',
'c = "x"'
].join("\n");
describe("without the |allowedStartRules| option", function() {
var parser = PEG.buildParser(grammar);
it("allows the first rule", function() {
expect(parser).toParse("x", { startRule: "a" }, "x");
});
it("does not allow any other rules", function() {
expect(parser).toFailToParse("x", { startRule: "b" }, { });
expect(parser).toFailToParse("x", { startRule: "c" }, { });
});
});
describe("with the |allowedStartRules| option", function() {
var parser = PEG.buildParser(grammar, { allowedStartRules: ["b", "c"] });
it("allows the specified rules", function() {
expect(parser).toParse("x", { startRule: "b" }, "x");
expect(parser).toParse("x", { startRule: "c" }, "x");
});
it("does not allow any other rules", function() {
expect(parser).toFailToParse("x", { startRule: "a" }, { });
});
});
});
/*
* Following examples are from Wikipedia, see
* http://en.wikipedia.org/w/index.php?title=Parsing_expression_grammar&oldid=335106938.

11
src/compiler/passes/generate-code.js

@ -3,7 +3,8 @@ PEG.compiler.passes.generateCode = function(ast, options) {
options = clone(options) || {};
defaults(options, {
cache: false,
trackLineAndColumn: false
trackLineAndColumn: false,
allowedStartRules: [ast.startRule]
});
/*
@ -305,8 +306,8 @@ PEG.compiler.passes.generateCode = function(ast, options) {
' */',
' parse: function(input) {',
' var parseFunctions = {',
' #for rule in node.rules',
' #{string(rule.name) + ": parse_" + rule.name + (rule !== node.rules[node.rules.length - 1] ? "," : "")}',
' #for rule in options.allowedStartRules',
' #{string(rule) + ": parse_" + rule + (rule !== options.allowedStartRules[options.allowedStartRules.length - 1] ? "," : "")}',
' #end',
' };',
' ',
@ -317,10 +318,10 @@ PEG.compiler.passes.generateCode = function(ast, options) {
' startRule = options.startRule;',
' ',
' if (parseFunctions[startRule] === undefined) {',
' throw new Error("Invalid rule name: " + quote(startRule) + ".");',
' throw new Error("Can\'t start parsing from rule " + quote(startRule) + ".");',
' }',
' } else {',
' startRule = #{string(node.startRule)};',
' startRule = #{string(options.allowedStartRules[0])};',
' }',
' ',
' #{posInit("pos")};',

59
src/parser.js

@ -37,62 +37,7 @@ PEG.parser = (function(){
*/
parse: function(input) {
var parseFunctions = {
"grammar": parse_grammar,
"initializer": parse_initializer,
"rule": parse_rule,
"choice": parse_choice,
"sequence": parse_sequence,
"labeled": parse_labeled,
"prefixed": parse_prefixed,
"suffixed": parse_suffixed,
"primary": parse_primary,
"action": parse_action,
"braced": parse_braced,
"nonBraceCharacters": parse_nonBraceCharacters,
"nonBraceCharacter": parse_nonBraceCharacter,
"equals": parse_equals,
"colon": parse_colon,
"semicolon": parse_semicolon,
"slash": parse_slash,
"and": parse_and,
"not": parse_not,
"question": parse_question,
"star": parse_star,
"plus": parse_plus,
"lparen": parse_lparen,
"rparen": parse_rparen,
"dot": parse_dot,
"identifier": parse_identifier,
"literal": parse_literal,
"string": parse_string,
"doubleQuotedString": parse_doubleQuotedString,
"doubleQuotedCharacter": parse_doubleQuotedCharacter,
"simpleDoubleQuotedCharacter": parse_simpleDoubleQuotedCharacter,
"singleQuotedString": parse_singleQuotedString,
"singleQuotedCharacter": parse_singleQuotedCharacter,
"simpleSingleQuotedCharacter": parse_simpleSingleQuotedCharacter,
"class": parse_class,
"classCharacterRange": parse_classCharacterRange,
"classCharacter": parse_classCharacter,
"bracketDelimitedCharacter": parse_bracketDelimitedCharacter,
"simpleBracketDelimitedCharacter": parse_simpleBracketDelimitedCharacter,
"simpleEscapeSequence": parse_simpleEscapeSequence,
"zeroEscapeSequence": parse_zeroEscapeSequence,
"hexEscapeSequence": parse_hexEscapeSequence,
"unicodeEscapeSequence": parse_unicodeEscapeSequence,
"eolEscapeSequence": parse_eolEscapeSequence,
"digit": parse_digit,
"hexDigit": parse_hexDigit,
"letter": parse_letter,
"lowerCaseLetter": parse_lowerCaseLetter,
"upperCaseLetter": parse_upperCaseLetter,
"__": parse___,
"comment": parse_comment,
"singleLineComment": parse_singleLineComment,
"multiLineComment": parse_multiLineComment,
"eol": parse_eol,
"eolChar": parse_eolChar,
"whitespace": parse_whitespace
"grammar": parse_grammar
};
var options = arguments.length > 1 ? arguments[1] : {},
@ -102,7 +47,7 @@ PEG.parser = (function(){
startRule = options.startRule;
if (parseFunctions[startRule] === undefined) {
throw new Error("Invalid rule name: " + quote(startRule) + ".");
throw new Error("Can't start parsing from rule " + quote(startRule) + ".");
}
} else {
startRule = "grammar";

Loading…
Cancel
Save