From 98ff2eb83fb3a09655880605f248d49387d1340e Mon Sep 17 00:00:00 2001 From: David Majda Date: Wed, 19 Sep 2012 08:32:21 +0200 Subject: [PATCH] Allow passing options to the parser This commit replaces the |startRule| parameter of the |parse| method in generated parsers with more generic |options| -- an options object. This options object can be used to pass custom options to the parser because it is visible as the |options| variable inside parser code. The start rule can now be specified as the |startRule| option. This means you have to replace all calls like: parser.parse("input", "myStartRule"); with parser.parse("input", { startRule: "myStartRule" }); Closes GH-37. --- README.md | 14 +++- spec/generated-parser.spec.js | 97 ++++++++++++++++++++++------ src/compiler/passes/generate-code.js | 9 ++- src/parser.js | 9 ++- 4 files changed, 104 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index a225d11..95aca17 100644 --- a/README.md +++ b/README.md @@ -90,7 +90,11 @@ Using the generated parser is simple — just call its `parse` method and pass a parser.parse("abcd"); // throws an exception -You can also start parsing from a specific rule in the grammar. Just pass the rule name to the `parse` method as a second parameter. +You can tweak parser behavior by passing a second parameter with an options object to the `parse` method. Only one option is currently supported: + + * `startRule` — name of the rule to start parsing from + +Parsers can also support their own custom options. Grammar Syntax and Semantics ---------------------------- @@ -121,7 +125,7 @@ On the top level, the grammar consists of *rules* (in our example, there are fiv A rule name must be a JavaScript identifier. It is followed by an equality sign (“=”) and a parsing expression. If the rule has a human-readable name, it is written as a JavaScript string between the name and separating equality sign. Rules need to be separated only by whitespace (their beginning is easily recognizable), but a semicolon (“;”) after the parsing expression is allowed. -Rules can be preceded by an *initializer* — a piece of JavaScript code in curly braces (“{” and “}”). This code is executed before the generated parser starts parsing. All variables and functions defined in the initializer are accessible in rule actions and semantic predicates. Curly braces in the initializer code must be balanced. +Rules can be preceded by an *initializer* — a piece of JavaScript code in curly braces (“{” and “}”). This code is executed before the generated parser starts parsing. All variables and functions defined in the initializer are accessible in rule actions and semantic predicates. The code inside the initializer can access options passed to the parser using the `options` variable. Curly braces in the initializer code must be balanced. The parsing expressions of the rules are used to match the input text to the grammar. There are various types of expressions — matching characters or character classes, indicating optional parts and repetition, etc. Expressions can also contain references to other rules. See detailed description below. @@ -188,6 +192,8 @@ The code inside the predicate can access all variables and functions defined in The code inside the predicate can also access the current parse position using the `offset` variable. It is a zero-based character index into the input string. If the `trackLineAndColumn` option was set to `true` when the parser was generated (or `--track-line-and-column` was used on the command line), the code can also access the current line and column using the `line` and `column` variables. Both are one-based indexes. +The code inside the predicate can also access options passed to the parser using the `options` variable. + Note that curly braces in the predicate code must be balanced. #### ! { *predicate* } @@ -198,6 +204,8 @@ The code inside the predicate can access all variables and functions defined in The code inside the predicate can also access the current parse position using the `offset` variable. It is a zero-based character index into the input string. If the `trackLineAndColumn` option was set to `true` when the parser was generated (or `--track-line-and-column` was used on the command line), the code can also access the current line and column using the `line` and `column` variables. Both are one-based indexes. +The code inside the predicate can also access options passed to the parser using the `options` variable. + Note that curly braces in the predicate code must be balanced. #### *label* : *expression* @@ -220,6 +228,8 @@ The code inside the action can access all variables and functions defined in the The code inside the action can also access the parse position at the beginning of the action's expression using the `offset` variable. It is a zero-based character index into the input string. If the `trackLineAndColumn` option was set to `true` when the parser was generated (or `--track-line-and-column` was used on the command line), the code can also access the line and column at the beginning of the action's expression using the `line` and `column` variables. Both are one-based indexes. +The code inside the action can also access options passed to the parser using the `options` variable. + Note that curly braces in the action code must be balanced. #### *expression1* / *expression2* / ... / *expressionn* diff --git a/spec/generated-parser.spec.js b/spec/generated-parser.spec.js index a915ec3..d23f985 100644 --- a/spec/generated-parser.spec.js +++ b/spec/generated-parser.spec.js @@ -40,14 +40,17 @@ describe("generated parser", function() { beforeEach(function() { this.addMatchers({ - toParse: function(input, expected) { - var result; + toParse: function(input) { + var options = arguments.length > 2 ? arguments[1] : {}, + expected = arguments[arguments.length - 1], + result; try { - result = this.actual.parse(input); + result = this.actual.parse(input, options); this.message = function() { return "Expected " + jasmine.pp(input) + " " + + "with options " + jasmine.pp(options) + " " + (this.isNot ? "not " : "") + "to parse as " + jasmine.pp(expected) + ", " + "but it parsed as " + jasmine.pp(result) + "."; @@ -57,6 +60,7 @@ describe("generated parser", function() { } catch (e) { this.message = function() { return "Expected " + jasmine.pp(input) + " " + + "with options " + jasmine.pp(options) + " " + "to parse as " + jasmine.pp(expected) + ", " + "but it failed to parse with message " + jasmine.pp(e.message) + "."; @@ -66,14 +70,21 @@ describe("generated parser", function() { } }, - toFailToParse: function(input, details) { + toFailToParse: function(input) { + var options = arguments.length > 2 ? arguments[1] : {}, + details = arguments.length > 1 + ? arguments[arguments.length - 1] + : undefined; + /* * Extracted into a function just to silence JSHint complaining about * creating functions in a loop. */ function buildKeyMessage(key, value) { return function() { - return "Expected " + jasmine.pp(input) + " to fail to parse" + return "Expected " + jasmine.pp(input) + " " + + "with options " + jasmine.pp(options) + " " + + "to fail to parse" + (details ? " with details " + jasmine.pp(details) : "") + ", " + "but " + jasmine.pp(key) + " " + "is " + jasmine.pp(value) + "."; @@ -83,10 +94,12 @@ describe("generated parser", function() { var result, key; try { - result = this.actual.parse(input); + result = this.actual.parse(input, options); this.message = function() { - return "Expected " + jasmine.pp(input) + " to fail to parse" + return "Expected " + jasmine.pp(input) + " " + + "with options " + jasmine.pp(options) + " " + + "to fail to parse" + (details ? " with details " + jasmine.pp(details) : "") + ", " + "but it parsed as " + jasmine.pp(result) + "."; }; @@ -95,7 +108,9 @@ describe("generated parser", function() { } catch (e) { if (this.isNot) { this.message = function() { - return "Expected " + jasmine.pp(input) + " to parse, " + return "Expected " + jasmine.pp(input) + + "with options " + jasmine.pp(options) + " " + + "to parse, " + "but it failed with message " + jasmine.pp(e.message) + "."; }; @@ -123,19 +138,27 @@ describe("generated parser", function() { 'b = "x" { return "b"; }' ].join("\n")); - it("uses the fist rule as a start rule when no |startRule| is specified", function() { - expect(parser.parse("x")).toBe("a"); - }); + describe("start rule", function() { + describe("without the |startRule| option", function() { + it("uses the fist rule", function() { + expect(parser).toParse("x", "a"); + }); + }); - it("uses the specified rule as a start rule when |startRule| is specified", function() { - expect(parser.parse("x", "a")).toBe("a"); - expect(parser.parse("x", "b")).toBe("b"); - }); + describe("when the |startRule| option specifies existing rule", function() { + it("uses the specified rule", function() { + expect(parser).toParse("x", { startRule: "a" }, "a"); + expect(parser).toParse("x", { startRule: "b" }, "b"); + }); + }); - it("throws exception when the specified start rule does not exist", function() { - expect(function() { - parser.parse("x", "c"); - }).toThrow("Invalid rule name: \"c\"."); + describe("when the |startRule| option specifies non-existent rule", function() { + it("throws exception", function() { + expect(parser).toFailToParse("x", { startRule: "c" }, { + message: "Invalid rule name: \"c\"." + }); + }); + }); }); }); @@ -149,6 +172,15 @@ describe("generated parser", function() { expect(parser).toParse("a", 42); }); + + it("can use options passed to the parser", function() { + var parser = PEG.buildParser([ + '{ var result = options; }', + 'start = "a" { return result; }' + ].join("\n"), options); + + expect(parser).toParse("a", { a: 42 }, { a: 42 }); + }); }); describe("rule matching", function() { @@ -277,6 +309,15 @@ describe("generated parser", function() { expect(parser).toParse("a", 42); }); + it("can use options passed to the parser", function() { + var parser = PEG.buildParser( + 'start = "a" { return options; }', + options + ); + + expect(parser).toParse("a", { a: 42 }, { a: 42 }); + }); + it("does not advance position when the expression matches but the action returns |null|", function() { var parser = PEG.buildParser( 'start = "a" { return null; } / "a"', @@ -431,6 +472,15 @@ describe("generated parser", function() { expect(parser).toParse("a", ["a", ""]); }); + + it("can use options passed to the parser", function() { + var parser = PEG.buildParser([ + '{ var result; }', + 'start = "a" &{ result = options; return true; } { return result; }' + ].join("\n"), options); + + expect(parser).toParse("a", { a: 42 }, { a: 42 }); + }); }); describe("semantic not code", function() { @@ -506,6 +556,15 @@ describe("generated parser", function() { expect(parser).toParse("a", ["a", ""]); }); + + it("can use options passed to the parser", function() { + var parser = PEG.buildParser([ + '{ var result; }', + 'start = "a" !{ result = options; return false; } { return result; }' + ].join("\n"), options); + + expect(parser).toParse("a", { a: 42 }, { a: 42 }); + }); }); describe("optional matching", function() { diff --git a/src/compiler/passes/generate-code.js b/src/compiler/passes/generate-code.js index da506d1..3fab741 100644 --- a/src/compiler/passes/generate-code.js +++ b/src/compiler/passes/generate-code.js @@ -305,14 +305,19 @@ PEG.compiler.passes.generateCode = function(ast, options) { ' * which the parser was generated (see |PEG.buildParser|). If the parsing is', ' * unsuccessful, throws |PEG.parser.SyntaxError| describing the error.', ' */', - ' parse: function(input, startRule) {', + ' parse: function(input) {', ' var parseFunctions = {', ' #for rule in node.rules', ' #{string(rule.name) + ": parse_" + rule.name + (rule !== node.rules[node.rules.length - 1] ? "," : "")}', ' #end', ' };', ' ', - ' if (startRule !== undefined) {', + ' var options = arguments.length > 1 ? arguments[1] : {},', + ' startRule;', + ' ', + ' if (options.startRule !== undefined) {', + ' startRule = options.startRule;', + ' ', ' if (parseFunctions[startRule] === undefined) {', ' throw new Error("Invalid rule name: " + quote(startRule) + ".");', ' }', diff --git a/src/parser.js b/src/parser.js index 52b15dd..b8ea8a9 100644 --- a/src/parser.js +++ b/src/parser.js @@ -35,7 +35,7 @@ PEG.parser = (function(){ * which the parser was generated (see |PEG.buildParser|). If the parsing is * unsuccessful, throws |PEG.parser.SyntaxError| describing the error. */ - parse: function(input, startRule) { + parse: function(input) { var parseFunctions = { "grammar": parse_grammar, "initializer": parse_initializer, @@ -95,7 +95,12 @@ PEG.parser = (function(){ "whitespace": parse_whitespace }; - if (startRule !== undefined) { + var options = arguments.length > 1 ? arguments[1] : {}, + startRule; + + if (options.startRule !== undefined) { + startRule = options.startRule; + if (parseFunctions[startRule] === undefined) { throw new Error("Invalid rule name: " + quote(startRule) + "."); }