Start rule of the grammar is now implicitly its first rule

Before this change, the start rule was the one named "start" and there was an option to override that. This is now impossible. The goal of this change is to contain all information for the parser generation in the grammar itself. In the future, some override directive for the start rule (like Bison's "%start") may be added to the grammar.
14 years ago · 917cf1cf2a
parent 70cf4cd94d
commit 917cf1cf2a
7 changed files with 55 additions and 85 deletions
--- a/2
+++ b/2
@ -1,4 +1,4 @@
 desc "Generate the grammar parser"
 task :metaparser do
-  system "bin/pegjs --start-rule grammar PEG.grammarParser lib/metagrammar.pegjs"
+  system "bin/pegjs PEG.grammarParser lib/metagrammar.pegjs"
 end
--- a/bin/pegjs-main.js
+++ b/bin/pegjs-main.js
@ -61,7 +61,6 @@ function printHelp() {
  print("omitted, standard input and output are used.");
  print("");
  print("Options:");
  print("  -s, --start-rule    specify grammar start rule (default: \"start\")");
  print("  -v, --version       print version information and exit");
  print("  -h, --help          print help and exit");
 }
@ -83,8 +82,6 @@ function abort(message) {
  exitFailure();
 }
 var startRule = "start";
 /*
 * The trimmed first argument is the script path -- see the beginning of this
 * file.
@ -93,15 +90,6 @@ var args = Array.prototype.slice.call(arguments, 1);
 while (args.length > 0 && isOption(args[0])) {
  switch (args[0]) {
    case "-s":
    case "--start-rule":
      nextArg();
      if (args.length === 0) {
        abort("Missing parameter of the -s/--start-rule option.");
      }
      startRule = args[0];
      break;
    case "-v":
    case "--version":
      printVersion();
@ -149,7 +137,7 @@ switch (args.length) {
 var input = readFile(inputFile);
 try {
-  var parser = PEG.buildParser(input, startRule);
+  var parser = PEG.buildParser(input);
 } catch (e) {
  if (e.line !== undefined && e.column !== undefined) {
    abort(e.line + ":" + e.column + ": " + e.message);
--- a/lib/compiler.js
+++ b/lib/compiler.js
@ -7,24 +7,18 @@
 /* no var */ PEG = {};
 /*
- * Generates a parser from a specified grammar and start rule and returns it.
+ * Generates a parser from a specified grammar and returns it.
 *
 * The grammar must be a string in the format described by the metagramar in the
- * metagrammar.pegjs file. The start rule may be unspecified, in which case
+ * metagrammar.pegjs file.
 * "start" is used.
 *
 * Throws |PEG.grammarParser.SyntaxError| if the grammar contains a syntax error
 * or |PEG.GrammarError| if it contains a semantic error. Note that not all
 * errors are detected during the generation and some may protrude to the
 * generated parser and cause its malfunction.
 */
-PEG.buildParser = function(grammar, startRule) {
+PEG.buildParser = function(grammar) {
-  startRule = startRule || "start";
+  return PEG.Compiler.compileParser(PEG.grammarParser.parse(grammar));
  return PEG.Compiler.compileParser(
    PEG.grammarParser.parse(grammar),
    startRule
  );
 };
 /* ===== PEG.GrammarError ===== */
@ -262,14 +256,14 @@ PEG.Compiler = {
  /*
   * Checks made on the grammar AST before compilation. Each check is a function
-   * that is passed the AST and start rule and does not return anything. If the
+   * that is passed the AST and does not return anything. If the check passes,
-   * check passes, the function does not do anything special, otherwise it
+   * the function does not do anything special, otherwise it throws
-   * throws |PEG.GrammarError|. The checks are run in sequence in order of their
+   * |PEG.GrammarError|. The checks are run in sequence in order of their
   * definition.
   */
  _checks: [
    /* Checks that all referenced rules exist. */
-    function(ast, startRule) {
+    function(ast) {
      function nop() {}
      function checkExpression(node) { check(node.expression); }
@ -313,17 +307,8 @@ PEG.Compiler = {
      }
    },
    /* Checks that the start rule is defined. */
    function(ast, startRule) {
      if (typeof(ast.rules[startRule]) === "undefined") {
        throw new PEG.GrammarError(
          "Missing \"" + startRule + "\" rule."
        );
      }
    },
    /* Checks that no left recursion is present. */
-    function(ast, startRule) {
+    function(ast) {
      function nop() {}
      function checkExpression(node, appliedRules) {
@ -385,15 +370,15 @@ PEG.Compiler = {
  /*
   * Optimalization passes made on the grammar AST before compilation. Each pass
-   * is a function that is passed the AST and start rule and returns a new AST
+   * is a function that is passed the AST and returns a new AST. The AST can be
-   * and start rule. The AST can be modified in-place by the pass. The passes
+   * modified in-place by the pass. The passes are run in sequence in order of
-   * are run in sequence in order of their definition.
+   * their definition.
   */
  _passes: [
    /*
     * Removes proxy rules -- that is, rules that only delegate to other rule.
     */
-    function(ast, startRule) {
+    function(ast) {
      function isProxyRule(node) {
        return node.type === "rule" && node.expression.type === "rule_ref";
      }
@ -449,14 +434,14 @@ PEG.Compiler = {
      for (var rule in ast.rules) {
        if (isProxyRule(ast.rules[rule])) {
          replaceRuleRefs(ast, ast.rules[rule].name, ast.rules[rule].expression.name);
-          if (rule === startRule) {
+          if (rule === ast.startRule) {
-            startRule = ast.rules[rule].expression.name;
+            ast.startRule = ast.rules[rule].expression.name;
          }
          delete ast.rules[rule];
        }
      }
-      return [ast, startRule];
+      return ast;
    }
  ],
@ -871,20 +856,18 @@ PEG.Compiler = {
  },
  /*
-   * Generates a parser from a specified grammar AST and start rule. Throws
+   * Generates a parser from a specified grammar AST. Throws |PEG.GrammarError|
-   * |PEG.GrammarError| if the AST contains a semantic error. Note that not all
+   * if the AST contains a semantic error. Note that not all errors are detected
-   * errors are detected during the generation and some may protrude to the
+   * during the generation and some may protrude to the generated parser and
-   * generated parser and cause its malfunction.
+   * cause its malfunction.
   */
-  compileParser: function(ast, startRule) {
+  compileParser: function(ast) {
    for (var i = 0; i < this._checks.length; i++) {
-      this._checks[i](ast, startRule);
+      this._checks[i](ast);
    }
    for (var i = 0; i < this._passes.length; i++) {
-      var newAstNadStartRule = this._passes[i](ast, startRule);
+      ast = this._passes[i](ast);
      ast       = newAstNadStartRule[0];
      startRule = newAstNadStartRule[1];
    }
    var initializerCode = ast.initializer !== null
@ -1078,7 +1061,7 @@ PEG.Compiler = {
      {
        initializerCode:          initializerCode,
        parseFunctionDefinitions: parseFunctionDefinitions.join("\n\n"),
-        startRule:                startRule
+        startRule:                ast.startRule
      }
    );
--- a/lib/metagrammar.js
+++ b/lib/metagrammar.js
@ -106,7 +106,8 @@ PEG.grammarParser = (function(){
                return {
                  initializer: initializer !== "" ? initializer : null,
-                  rules:       rulesConverted
+                  rules:       rulesConverted,
                  startRule:   rules[0].name
                }
              })(result1[1], result1[2])
          : null;
--- a/lib/metagrammar.pegjs
+++ b/lib/metagrammar.pegjs
@ -5,7 +5,8 @@ grammar
      return {
        initializer: initializer !== "" ? initializer : null,
-        rules:       rulesConverted
+        rules:       rulesConverted,
        startRule:   rules[0].name
      }
    }
--- a/test/compiler-test.js
+++ b/test/compiler-test.js
@ -205,14 +205,6 @@ test("buildParser reports syntax errors in the grammar", function() {
  );
 });
 test("buildParser reports missing start rule", function() {
  throws(
    function() { PEG.buildParser('notStart = "abcd"'); },
    PEG.GrammarError,
    { message: "Missing \"start\" rule." }
  );
 });
 test("buildParser reports missing referenced rules", function() {
  var grammars = [
    'start = missing',
@ -645,26 +637,26 @@ test("arithmetics", function() {
   * Expr    ← Sum
   */
  var parser = PEG.buildParser([
-    'Value   = digits:[0-9]+     { return parseInt(digits.join("")); }',
+    'Expr    = Sum',
-    '        / "(" expr:Expr ")" { return expr; }',
+    'Sum     = head:Product tail:(("+" / "-") Product)* {',
    'Product = head:Value tail:(("*" / "/") Value)* {',
    '            var result = head;',
    '            for (var i = 0; i < tail.length; i++) {',
-    '              if (tail[i][0] == "*") { result *= tail[i][1]; }',
+    '              if (tail[i][0] == "+") { result += tail[i][1]; }',
-    '              if (tail[i][0] == "/") { result /= tail[i][1]; }',
+    '              if (tail[i][0] == "-") { result -= tail[i][1]; }',
    '            }',
    '            return result;',
    '          }',
-    'Sum     = head:Product tail:(("+" / "-") Product)* {',
+    'Product = head:Value tail:(("*" / "/") Value)* {',
    '            var result = head;',
    '            for (var i = 0; i < tail.length; i++) {',
-    '              if (tail[i][0] == "+") { result += tail[i][1]; }',
+    '              if (tail[i][0] == "*") { result *= tail[i][1]; }',
-    '              if (tail[i][0] == "-") { result -= tail[i][1]; }',
+    '              if (tail[i][0] == "/") { result /= tail[i][1]; }',
    '            }',
    '            return result;',
    '          }',
-    'Expr    = Sum'
+    'Value   = digits:[0-9]+     { return parseInt(digits.join("")); }',
-  ].join("\n"), "Expr");
+    '        / "(" expr:Expr ")" { return expr; }'
  ].join("\n"));
  /* Test "value" rule. */
  parses(parser, "0", 0);
@ -704,7 +696,7 @@ test("non-context-free language", function() {
    'S = &(A "c") a:"a"+ B:B !("a" / "b" / "c") { return a.join("") + B; }',
    'A = a:"a" A:A? b:"b" { return a + A + b; }',
    'B = b:"b" B:B? c:"c" { return b + B + c; }',
-  ].join("\n"), "S");
+  ].join("\n"));
  parses(parser, "abc", "abc");
  parses(parser, "aaabbbccc", "aaabbbccc");
@ -725,13 +717,13 @@ test("nested comments", function() {
   * Z ← any single character
   */
  var parser = PEG.buildParser([
    'Begin = "(*"',
    'End   = "*)"',
    'C     = begin:Begin ns:N* end:End { return begin + ns.join("") + end; }',
    'N     = C',
    '      / !Begin !End z:Z { return z; }',
-    'Z     = .'
+    'Z     = .',
-  ].join("\n"), "C");
+    'Begin = "(*"',
    'End   = "*)"'
  ].join("\n"));
  parses(parser, "(**)", "(**)");
  parses(parser, "(*abc*)", "(*abc*)");
--- a/test/metagrammar-test.js
+++ b/test/metagrammar-test.js
@ -128,7 +128,8 @@ var choiceLiterals = choice([literalAbcd, literalEfgh, literalIjkl]);
 function oneRuleGrammar(expression) {
  return {
    initializer: null,
-    rules:       { start: rule("start", null, expression) }
+    rules:       { start: rule("start", null, expression) },
    startRule:   "start"
  };
 }
@ -158,7 +159,8 @@ var initializerGrammar = {
  initializer: initializer(" code "),
  rules: {
    a: rule("a", null, literalAbcd),
-  }
+  },
  startRule:   "a"
 };
 /* Canonical grammar is "a: \"abcd\"; b: \"efgh\"; c: \"ijkl\";". */
@ -167,7 +169,8 @@ test("parses grammar", function() {
    'a = "abcd"',
    {
      initializer: null,
-      rules:       { a: rule("a", null, literalAbcd) }
+      rules:       { a: rule("a", null, literalAbcd) },
      startRule:   "a"
    }
  );
  grammarParserParses('{ code }; a = "abcd"', initializerGrammar);
@ -179,7 +182,8 @@ test("parses grammar", function() {
        a: rule("a", null, literalAbcd),
        b: rule("b", null, literalEfgh),
        c: rule("c", null, literalIjkl)
-      }
+      },
      startRule:   "a"
    }
  );
 });
@ -200,7 +204,8 @@ test("parses rule", function() {
    'start "start rule" = "abcd" / "efgh" / "ijkl"',
    {
      initializer: null,
-      rules:       { start: rule("start", "start rule", choiceLiterals) }
+      rules:       { start: rule("start", "start rule", choiceLiterals) },
      startRule:   "start"
    }
  );
  grammarParserParses(