Start rule of the grammar is now implicitly its first rule

Before this change, the start rule was the one named "start" and there
was an option to override that. This is now impossible.

The goal of this change is to contain all information for the parser
generation in the grammar itself.

In the future, some override directive for the start rule (like Bison's
"%start") may be added to the grammar.
redux
David Majda 14 years ago
parent 70cf4cd94d
commit 917cf1cf2a

@ -1,4 +1,4 @@
desc "Generate the grammar parser" desc "Generate the grammar parser"
task :metaparser do task :metaparser do
system "bin/pegjs --start-rule grammar PEG.grammarParser lib/metagrammar.pegjs" system "bin/pegjs PEG.grammarParser lib/metagrammar.pegjs"
end end

@ -61,7 +61,6 @@ function printHelp() {
print("omitted, standard input and output are used."); print("omitted, standard input and output are used.");
print(""); print("");
print("Options:"); print("Options:");
print(" -s, --start-rule specify grammar start rule (default: \"start\")");
print(" -v, --version print version information and exit"); print(" -v, --version print version information and exit");
print(" -h, --help print help and exit"); print(" -h, --help print help and exit");
} }
@ -83,8 +82,6 @@ function abort(message) {
exitFailure(); exitFailure();
} }
var startRule = "start";
/* /*
* The trimmed first argument is the script path -- see the beginning of this * The trimmed first argument is the script path -- see the beginning of this
* file. * file.
@ -93,15 +90,6 @@ var args = Array.prototype.slice.call(arguments, 1);
while (args.length > 0 && isOption(args[0])) { while (args.length > 0 && isOption(args[0])) {
switch (args[0]) { switch (args[0]) {
case "-s":
case "--start-rule":
nextArg();
if (args.length === 0) {
abort("Missing parameter of the -s/--start-rule option.");
}
startRule = args[0];
break;
case "-v": case "-v":
case "--version": case "--version":
printVersion(); printVersion();
@ -149,7 +137,7 @@ switch (args.length) {
var input = readFile(inputFile); var input = readFile(inputFile);
try { try {
var parser = PEG.buildParser(input, startRule); var parser = PEG.buildParser(input);
} catch (e) { } catch (e) {
if (e.line !== undefined && e.column !== undefined) { if (e.line !== undefined && e.column !== undefined) {
abort(e.line + ":" + e.column + ": " + e.message); abort(e.line + ":" + e.column + ": " + e.message);

@ -7,24 +7,18 @@
/* no var */ PEG = {}; /* no var */ PEG = {};
/* /*
* Generates a parser from a specified grammar and start rule and returns it. * Generates a parser from a specified grammar and returns it.
* *
* The grammar must be a string in the format described by the metagramar in the * The grammar must be a string in the format described by the metagramar in the
* metagrammar.pegjs file. The start rule may be unspecified, in which case * metagrammar.pegjs file.
* "start" is used.
* *
* Throws |PEG.grammarParser.SyntaxError| if the grammar contains a syntax error * Throws |PEG.grammarParser.SyntaxError| if the grammar contains a syntax error
* or |PEG.GrammarError| if it contains a semantic error. Note that not all * or |PEG.GrammarError| if it contains a semantic error. Note that not all
* errors are detected during the generation and some may protrude to the * errors are detected during the generation and some may protrude to the
* generated parser and cause its malfunction. * generated parser and cause its malfunction.
*/ */
PEG.buildParser = function(grammar, startRule) { PEG.buildParser = function(grammar) {
startRule = startRule || "start"; return PEG.Compiler.compileParser(PEG.grammarParser.parse(grammar));
return PEG.Compiler.compileParser(
PEG.grammarParser.parse(grammar),
startRule
);
}; };
/* ===== PEG.GrammarError ===== */ /* ===== PEG.GrammarError ===== */
@ -262,14 +256,14 @@ PEG.Compiler = {
/* /*
* Checks made on the grammar AST before compilation. Each check is a function * Checks made on the grammar AST before compilation. Each check is a function
* that is passed the AST and start rule and does not return anything. If the * that is passed the AST and does not return anything. If the check passes,
* check passes, the function does not do anything special, otherwise it * the function does not do anything special, otherwise it throws
* throws |PEG.GrammarError|. The checks are run in sequence in order of their * |PEG.GrammarError|. The checks are run in sequence in order of their
* definition. * definition.
*/ */
_checks: [ _checks: [
/* Checks that all referenced rules exist. */ /* Checks that all referenced rules exist. */
function(ast, startRule) { function(ast) {
function nop() {} function nop() {}
function checkExpression(node) { check(node.expression); } function checkExpression(node) { check(node.expression); }
@ -313,17 +307,8 @@ PEG.Compiler = {
} }
}, },
/* Checks that the start rule is defined. */
function(ast, startRule) {
if (typeof(ast.rules[startRule]) === "undefined") {
throw new PEG.GrammarError(
"Missing \"" + startRule + "\" rule."
);
}
},
/* Checks that no left recursion is present. */ /* Checks that no left recursion is present. */
function(ast, startRule) { function(ast) {
function nop() {} function nop() {}
function checkExpression(node, appliedRules) { function checkExpression(node, appliedRules) {
@ -385,15 +370,15 @@ PEG.Compiler = {
/* /*
* Optimalization passes made on the grammar AST before compilation. Each pass * Optimalization passes made on the grammar AST before compilation. Each pass
* is a function that is passed the AST and start rule and returns a new AST * is a function that is passed the AST and returns a new AST. The AST can be
* and start rule. The AST can be modified in-place by the pass. The passes * modified in-place by the pass. The passes are run in sequence in order of
* are run in sequence in order of their definition. * their definition.
*/ */
_passes: [ _passes: [
/* /*
* Removes proxy rules -- that is, rules that only delegate to other rule. * Removes proxy rules -- that is, rules that only delegate to other rule.
*/ */
function(ast, startRule) { function(ast) {
function isProxyRule(node) { function isProxyRule(node) {
return node.type === "rule" && node.expression.type === "rule_ref"; return node.type === "rule" && node.expression.type === "rule_ref";
} }
@ -449,14 +434,14 @@ PEG.Compiler = {
for (var rule in ast.rules) { for (var rule in ast.rules) {
if (isProxyRule(ast.rules[rule])) { if (isProxyRule(ast.rules[rule])) {
replaceRuleRefs(ast, ast.rules[rule].name, ast.rules[rule].expression.name); replaceRuleRefs(ast, ast.rules[rule].name, ast.rules[rule].expression.name);
if (rule === startRule) { if (rule === ast.startRule) {
startRule = ast.rules[rule].expression.name; ast.startRule = ast.rules[rule].expression.name;
} }
delete ast.rules[rule]; delete ast.rules[rule];
} }
} }
return [ast, startRule]; return ast;
} }
], ],
@ -871,20 +856,18 @@ PEG.Compiler = {
}, },
/* /*
* Generates a parser from a specified grammar AST and start rule. Throws * Generates a parser from a specified grammar AST. Throws |PEG.GrammarError|
* |PEG.GrammarError| if the AST contains a semantic error. Note that not all * if the AST contains a semantic error. Note that not all errors are detected
* errors are detected during the generation and some may protrude to the * during the generation and some may protrude to the generated parser and
* generated parser and cause its malfunction. * cause its malfunction.
*/ */
compileParser: function(ast, startRule) { compileParser: function(ast) {
for (var i = 0; i < this._checks.length; i++) { for (var i = 0; i < this._checks.length; i++) {
this._checks[i](ast, startRule); this._checks[i](ast);
} }
for (var i = 0; i < this._passes.length; i++) { for (var i = 0; i < this._passes.length; i++) {
var newAstNadStartRule = this._passes[i](ast, startRule); ast = this._passes[i](ast);
ast = newAstNadStartRule[0];
startRule = newAstNadStartRule[1];
} }
var initializerCode = ast.initializer !== null var initializerCode = ast.initializer !== null
@ -1078,7 +1061,7 @@ PEG.Compiler = {
{ {
initializerCode: initializerCode, initializerCode: initializerCode,
parseFunctionDefinitions: parseFunctionDefinitions.join("\n\n"), parseFunctionDefinitions: parseFunctionDefinitions.join("\n\n"),
startRule: startRule startRule: ast.startRule
} }
); );

@ -106,7 +106,8 @@ PEG.grammarParser = (function(){
return { return {
initializer: initializer !== "" ? initializer : null, initializer: initializer !== "" ? initializer : null,
rules: rulesConverted rules: rulesConverted,
startRule: rules[0].name
} }
})(result1[1], result1[2]) })(result1[1], result1[2])
: null; : null;

@ -5,7 +5,8 @@ grammar
return { return {
initializer: initializer !== "" ? initializer : null, initializer: initializer !== "" ? initializer : null,
rules: rulesConverted rules: rulesConverted,
startRule: rules[0].name
} }
} }

@ -205,14 +205,6 @@ test("buildParser reports syntax errors in the grammar", function() {
); );
}); });
test("buildParser reports missing start rule", function() {
throws(
function() { PEG.buildParser('notStart = "abcd"'); },
PEG.GrammarError,
{ message: "Missing \"start\" rule." }
);
});
test("buildParser reports missing referenced rules", function() { test("buildParser reports missing referenced rules", function() {
var grammars = [ var grammars = [
'start = missing', 'start = missing',
@ -645,26 +637,26 @@ test("arithmetics", function() {
* Expr Sum * Expr Sum
*/ */
var parser = PEG.buildParser([ var parser = PEG.buildParser([
'Value = digits:[0-9]+ { return parseInt(digits.join("")); }', 'Expr = Sum',
' / "(" expr:Expr ")" { return expr; }', 'Sum = head:Product tail:(("+" / "-") Product)* {',
'Product = head:Value tail:(("*" / "/") Value)* {',
' var result = head;', ' var result = head;',
' for (var i = 0; i < tail.length; i++) {', ' for (var i = 0; i < tail.length; i++) {',
' if (tail[i][0] == "*") { result *= tail[i][1]; }', ' if (tail[i][0] == "+") { result += tail[i][1]; }',
' if (tail[i][0] == "/") { result /= tail[i][1]; }', ' if (tail[i][0] == "-") { result -= tail[i][1]; }',
' }', ' }',
' return result;', ' return result;',
' }', ' }',
'Sum = head:Product tail:(("+" / "-") Product)* {', 'Product = head:Value tail:(("*" / "/") Value)* {',
' var result = head;', ' var result = head;',
' for (var i = 0; i < tail.length; i++) {', ' for (var i = 0; i < tail.length; i++) {',
' if (tail[i][0] == "+") { result += tail[i][1]; }', ' if (tail[i][0] == "*") { result *= tail[i][1]; }',
' if (tail[i][0] == "-") { result -= tail[i][1]; }', ' if (tail[i][0] == "/") { result /= tail[i][1]; }',
' }', ' }',
' return result;', ' return result;',
' }', ' }',
'Expr = Sum' 'Value = digits:[0-9]+ { return parseInt(digits.join("")); }',
].join("\n"), "Expr"); ' / "(" expr:Expr ")" { return expr; }'
].join("\n"));
/* Test "value" rule. */ /* Test "value" rule. */
parses(parser, "0", 0); parses(parser, "0", 0);
@ -704,7 +696,7 @@ test("non-context-free language", function() {
'S = &(A "c") a:"a"+ B:B !("a" / "b" / "c") { return a.join("") + B; }', 'S = &(A "c") a:"a"+ B:B !("a" / "b" / "c") { return a.join("") + B; }',
'A = a:"a" A:A? b:"b" { return a + A + b; }', 'A = a:"a" A:A? b:"b" { return a + A + b; }',
'B = b:"b" B:B? c:"c" { return b + B + c; }', 'B = b:"b" B:B? c:"c" { return b + B + c; }',
].join("\n"), "S"); ].join("\n"));
parses(parser, "abc", "abc"); parses(parser, "abc", "abc");
parses(parser, "aaabbbccc", "aaabbbccc"); parses(parser, "aaabbbccc", "aaabbbccc");
@ -725,13 +717,13 @@ test("nested comments", function() {
* Z any single character * Z any single character
*/ */
var parser = PEG.buildParser([ var parser = PEG.buildParser([
'Begin = "(*"',
'End = "*)"',
'C = begin:Begin ns:N* end:End { return begin + ns.join("") + end; }', 'C = begin:Begin ns:N* end:End { return begin + ns.join("") + end; }',
'N = C', 'N = C',
' / !Begin !End z:Z { return z; }', ' / !Begin !End z:Z { return z; }',
'Z = .' 'Z = .',
].join("\n"), "C"); 'Begin = "(*"',
'End = "*)"'
].join("\n"));
parses(parser, "(**)", "(**)"); parses(parser, "(**)", "(**)");
parses(parser, "(*abc*)", "(*abc*)"); parses(parser, "(*abc*)", "(*abc*)");

@ -128,7 +128,8 @@ var choiceLiterals = choice([literalAbcd, literalEfgh, literalIjkl]);
function oneRuleGrammar(expression) { function oneRuleGrammar(expression) {
return { return {
initializer: null, initializer: null,
rules: { start: rule("start", null, expression) } rules: { start: rule("start", null, expression) },
startRule: "start"
}; };
} }
@ -158,7 +159,8 @@ var initializerGrammar = {
initializer: initializer(" code "), initializer: initializer(" code "),
rules: { rules: {
a: rule("a", null, literalAbcd), a: rule("a", null, literalAbcd),
} },
startRule: "a"
}; };
/* Canonical grammar is "a: \"abcd\"; b: \"efgh\"; c: \"ijkl\";". */ /* Canonical grammar is "a: \"abcd\"; b: \"efgh\"; c: \"ijkl\";". */
@ -167,7 +169,8 @@ test("parses grammar", function() {
'a = "abcd"', 'a = "abcd"',
{ {
initializer: null, initializer: null,
rules: { a: rule("a", null, literalAbcd) } rules: { a: rule("a", null, literalAbcd) },
startRule: "a"
} }
); );
grammarParserParses('{ code }; a = "abcd"', initializerGrammar); grammarParserParses('{ code }; a = "abcd"', initializerGrammar);
@ -179,7 +182,8 @@ test("parses grammar", function() {
a: rule("a", null, literalAbcd), a: rule("a", null, literalAbcd),
b: rule("b", null, literalEfgh), b: rule("b", null, literalEfgh),
c: rule("c", null, literalIjkl) c: rule("c", null, literalIjkl)
} },
startRule: "a"
} }
); );
}); });
@ -200,7 +204,8 @@ test("parses rule", function() {
'start "start rule" = "abcd" / "efgh" / "ijkl"', 'start "start rule" = "abcd" / "efgh" / "ijkl"',
{ {
initializer: null, initializer: null,
rules: { start: rule("start", "start rule", choiceLiterals) } rules: { start: rule("start", "start rule", choiceLiterals) },
startRule: "start"
} }
); );
grammarParserParses( grammarParserParses(

Loading…
Cancel
Save