From 917cf1cf2a85d3069e10a2d17caa19c2cfcac92a Mon Sep 17 00:00:00 2001 From: David Majda Date: Tue, 8 Jun 2010 11:03:28 +0200 Subject: [PATCH] Start rule of the grammar is now implicitly its first rule Before this change, the start rule was the one named "start" and there was an option to override that. This is now impossible. The goal of this change is to contain all information for the parser generation in the grammar itself. In the future, some override directive for the start rule (like Bison's "%start") may be added to the grammar. --- Rakefile | 2 +- bin/pegjs-main.js | 14 +-------- lib/compiler.js | 65 +++++++++++++++------------------------- lib/metagrammar.js | 3 +- lib/metagrammar.pegjs | 3 +- test/compiler-test.js | 38 ++++++++++------------- test/metagrammar-test.js | 15 ++++++---- 7 files changed, 55 insertions(+), 85 deletions(-) diff --git a/Rakefile b/Rakefile index ea855be..0e8705d 100644 --- a/Rakefile +++ b/Rakefile @@ -1,4 +1,4 @@ desc "Generate the grammar parser" task :metaparser do - system "bin/pegjs --start-rule grammar PEG.grammarParser lib/metagrammar.pegjs" + system "bin/pegjs PEG.grammarParser lib/metagrammar.pegjs" end diff --git a/bin/pegjs-main.js b/bin/pegjs-main.js index 8d5abc2..4bc32c7 100644 --- a/bin/pegjs-main.js +++ b/bin/pegjs-main.js @@ -61,7 +61,6 @@ function printHelp() { print("omitted, standard input and output are used."); print(""); print("Options:"); - print(" -s, --start-rule specify grammar start rule (default: \"start\")"); print(" -v, --version print version information and exit"); print(" -h, --help print help and exit"); } @@ -83,8 +82,6 @@ function abort(message) { exitFailure(); } -var startRule = "start"; - /* * The trimmed first argument is the script path -- see the beginning of this * file. @@ -93,15 +90,6 @@ var args = Array.prototype.slice.call(arguments, 1); while (args.length > 0 && isOption(args[0])) { switch (args[0]) { - case "-s": - case "--start-rule": - nextArg(); - if (args.length === 0) { - abort("Missing parameter of the -s/--start-rule option."); - } - startRule = args[0]; - break; - case "-v": case "--version": printVersion(); @@ -149,7 +137,7 @@ switch (args.length) { var input = readFile(inputFile); try { - var parser = PEG.buildParser(input, startRule); + var parser = PEG.buildParser(input); } catch (e) { if (e.line !== undefined && e.column !== undefined) { abort(e.line + ":" + e.column + ": " + e.message); diff --git a/lib/compiler.js b/lib/compiler.js index 708e678..0557086 100644 --- a/lib/compiler.js +++ b/lib/compiler.js @@ -7,24 +7,18 @@ /* no var */ PEG = {}; /* - * Generates a parser from a specified grammar and start rule and returns it. + * Generates a parser from a specified grammar and returns it. * * The grammar must be a string in the format described by the metagramar in the - * metagrammar.pegjs file. The start rule may be unspecified, in which case - * "start" is used. + * metagrammar.pegjs file. * * Throws |PEG.grammarParser.SyntaxError| if the grammar contains a syntax error * or |PEG.GrammarError| if it contains a semantic error. Note that not all * errors are detected during the generation and some may protrude to the * generated parser and cause its malfunction. */ -PEG.buildParser = function(grammar, startRule) { - startRule = startRule || "start"; - - return PEG.Compiler.compileParser( - PEG.grammarParser.parse(grammar), - startRule - ); +PEG.buildParser = function(grammar) { + return PEG.Compiler.compileParser(PEG.grammarParser.parse(grammar)); }; /* ===== PEG.GrammarError ===== */ @@ -262,14 +256,14 @@ PEG.Compiler = { /* * Checks made on the grammar AST before compilation. Each check is a function - * that is passed the AST and start rule and does not return anything. If the - * check passes, the function does not do anything special, otherwise it - * throws |PEG.GrammarError|. The checks are run in sequence in order of their + * that is passed the AST and does not return anything. If the check passes, + * the function does not do anything special, otherwise it throws + * |PEG.GrammarError|. The checks are run in sequence in order of their * definition. */ _checks: [ /* Checks that all referenced rules exist. */ - function(ast, startRule) { + function(ast) { function nop() {} function checkExpression(node) { check(node.expression); } @@ -313,17 +307,8 @@ PEG.Compiler = { } }, - /* Checks that the start rule is defined. */ - function(ast, startRule) { - if (typeof(ast.rules[startRule]) === "undefined") { - throw new PEG.GrammarError( - "Missing \"" + startRule + "\" rule." - ); - } - }, - /* Checks that no left recursion is present. */ - function(ast, startRule) { + function(ast) { function nop() {} function checkExpression(node, appliedRules) { @@ -385,15 +370,15 @@ PEG.Compiler = { /* * Optimalization passes made on the grammar AST before compilation. Each pass - * is a function that is passed the AST and start rule and returns a new AST - * and start rule. The AST can be modified in-place by the pass. The passes - * are run in sequence in order of their definition. + * is a function that is passed the AST and returns a new AST. The AST can be + * modified in-place by the pass. The passes are run in sequence in order of + * their definition. */ _passes: [ /* * Removes proxy rules -- that is, rules that only delegate to other rule. */ - function(ast, startRule) { + function(ast) { function isProxyRule(node) { return node.type === "rule" && node.expression.type === "rule_ref"; } @@ -449,14 +434,14 @@ PEG.Compiler = { for (var rule in ast.rules) { if (isProxyRule(ast.rules[rule])) { replaceRuleRefs(ast, ast.rules[rule].name, ast.rules[rule].expression.name); - if (rule === startRule) { - startRule = ast.rules[rule].expression.name; + if (rule === ast.startRule) { + ast.startRule = ast.rules[rule].expression.name; } delete ast.rules[rule]; } } - return [ast, startRule]; + return ast; } ], @@ -871,20 +856,18 @@ PEG.Compiler = { }, /* - * Generates a parser from a specified grammar AST and start rule. Throws - * |PEG.GrammarError| if the AST contains a semantic error. Note that not all - * errors are detected during the generation and some may protrude to the - * generated parser and cause its malfunction. + * Generates a parser from a specified grammar AST. Throws |PEG.GrammarError| + * if the AST contains a semantic error. Note that not all errors are detected + * during the generation and some may protrude to the generated parser and + * cause its malfunction. */ - compileParser: function(ast, startRule) { + compileParser: function(ast) { for (var i = 0; i < this._checks.length; i++) { - this._checks[i](ast, startRule); + this._checks[i](ast); } for (var i = 0; i < this._passes.length; i++) { - var newAstNadStartRule = this._passes[i](ast, startRule); - ast = newAstNadStartRule[0]; - startRule = newAstNadStartRule[1]; + ast = this._passes[i](ast); } var initializerCode = ast.initializer !== null @@ -1078,7 +1061,7 @@ PEG.Compiler = { { initializerCode: initializerCode, parseFunctionDefinitions: parseFunctionDefinitions.join("\n\n"), - startRule: startRule + startRule: ast.startRule } ); diff --git a/lib/metagrammar.js b/lib/metagrammar.js index dfc0b99..569978f 100644 --- a/lib/metagrammar.js +++ b/lib/metagrammar.js @@ -106,7 +106,8 @@ PEG.grammarParser = (function(){ return { initializer: initializer !== "" ? initializer : null, - rules: rulesConverted + rules: rulesConverted, + startRule: rules[0].name } })(result1[1], result1[2]) : null; diff --git a/lib/metagrammar.pegjs b/lib/metagrammar.pegjs index 69a4b8b..6878e4d 100644 --- a/lib/metagrammar.pegjs +++ b/lib/metagrammar.pegjs @@ -5,7 +5,8 @@ grammar return { initializer: initializer !== "" ? initializer : null, - rules: rulesConverted + rules: rulesConverted, + startRule: rules[0].name } } diff --git a/test/compiler-test.js b/test/compiler-test.js index f6efb9e..bed60d1 100644 --- a/test/compiler-test.js +++ b/test/compiler-test.js @@ -205,14 +205,6 @@ test("buildParser reports syntax errors in the grammar", function() { ); }); -test("buildParser reports missing start rule", function() { - throws( - function() { PEG.buildParser('notStart = "abcd"'); }, - PEG.GrammarError, - { message: "Missing \"start\" rule." } - ); -}); - test("buildParser reports missing referenced rules", function() { var grammars = [ 'start = missing', @@ -645,26 +637,26 @@ test("arithmetics", function() { * Expr ← Sum */ var parser = PEG.buildParser([ - 'Value = digits:[0-9]+ { return parseInt(digits.join("")); }', - ' / "(" expr:Expr ")" { return expr; }', - 'Product = head:Value tail:(("*" / "/") Value)* {', + 'Expr = Sum', + 'Sum = head:Product tail:(("+" / "-") Product)* {', ' var result = head;', ' for (var i = 0; i < tail.length; i++) {', - ' if (tail[i][0] == "*") { result *= tail[i][1]; }', - ' if (tail[i][0] == "/") { result /= tail[i][1]; }', + ' if (tail[i][0] == "+") { result += tail[i][1]; }', + ' if (tail[i][0] == "-") { result -= tail[i][1]; }', ' }', ' return result;', ' }', - 'Sum = head:Product tail:(("+" / "-") Product)* {', + 'Product = head:Value tail:(("*" / "/") Value)* {', ' var result = head;', ' for (var i = 0; i < tail.length; i++) {', - ' if (tail[i][0] == "+") { result += tail[i][1]; }', - ' if (tail[i][0] == "-") { result -= tail[i][1]; }', + ' if (tail[i][0] == "*") { result *= tail[i][1]; }', + ' if (tail[i][0] == "/") { result /= tail[i][1]; }', ' }', ' return result;', ' }', - 'Expr = Sum' - ].join("\n"), "Expr"); + 'Value = digits:[0-9]+ { return parseInt(digits.join("")); }', + ' / "(" expr:Expr ")" { return expr; }' + ].join("\n")); /* Test "value" rule. */ parses(parser, "0", 0); @@ -704,7 +696,7 @@ test("non-context-free language", function() { 'S = &(A "c") a:"a"+ B:B !("a" / "b" / "c") { return a.join("") + B; }', 'A = a:"a" A:A? b:"b" { return a + A + b; }', 'B = b:"b" B:B? c:"c" { return b + B + c; }', - ].join("\n"), "S"); + ].join("\n")); parses(parser, "abc", "abc"); parses(parser, "aaabbbccc", "aaabbbccc"); @@ -725,13 +717,13 @@ test("nested comments", function() { * Z ← any single character */ var parser = PEG.buildParser([ - 'Begin = "(*"', - 'End = "*)"', 'C = begin:Begin ns:N* end:End { return begin + ns.join("") + end; }', 'N = C', ' / !Begin !End z:Z { return z; }', - 'Z = .' - ].join("\n"), "C"); + 'Z = .', + 'Begin = "(*"', + 'End = "*)"' + ].join("\n")); parses(parser, "(**)", "(**)"); parses(parser, "(*abc*)", "(*abc*)"); diff --git a/test/metagrammar-test.js b/test/metagrammar-test.js index 7591418..5420f6e 100644 --- a/test/metagrammar-test.js +++ b/test/metagrammar-test.js @@ -128,7 +128,8 @@ var choiceLiterals = choice([literalAbcd, literalEfgh, literalIjkl]); function oneRuleGrammar(expression) { return { initializer: null, - rules: { start: rule("start", null, expression) } + rules: { start: rule("start", null, expression) }, + startRule: "start" }; } @@ -158,7 +159,8 @@ var initializerGrammar = { initializer: initializer(" code "), rules: { a: rule("a", null, literalAbcd), - } + }, + startRule: "a" }; /* Canonical grammar is "a: \"abcd\"; b: \"efgh\"; c: \"ijkl\";". */ @@ -167,7 +169,8 @@ test("parses grammar", function() { 'a = "abcd"', { initializer: null, - rules: { a: rule("a", null, literalAbcd) } + rules: { a: rule("a", null, literalAbcd) }, + startRule: "a" } ); grammarParserParses('{ code }; a = "abcd"', initializerGrammar); @@ -179,7 +182,8 @@ test("parses grammar", function() { a: rule("a", null, literalAbcd), b: rule("b", null, literalEfgh), c: rule("c", null, literalIjkl) - } + }, + startRule: "a" } ); }); @@ -200,7 +204,8 @@ test("parses rule", function() { 'start "start rule" = "abcd" / "efgh" / "ijkl"', { initializer: null, - rules: { start: rule("start", "start rule", choiceLiterals) } + rules: { start: rule("start", "start rule", choiceLiterals) }, + startRule: "start" } ); grammarParserParses(