Move param computations from the emitter into a separate pass

This has two main benefits: 1. The knowledge about scoping params in at one designated place, making all future adjustments in this area easier. 2. Action-related code does not handle sequences specially anymore. Such knowledge/behavior doesn't belong there.
13 years ago · 4cf50bcf9f
parent efc38eef9b
commit 4cf50bcf9f
5 changed files with 267 additions and 44 deletions
--- a/src/compiler.js
+++ b/src/compiler.js
@ -7,7 +7,8 @@ PEG.compiler = {
    "reportMissingRules",
    "reportLeftRecursion",
    "removeProxyRules",
-    "computeVarNames"
+    "computeVarNames",
+    "computeParams"
  ],

  /*
--- a/src/emitter.js
+++ b/src/emitter.js
@ -596,7 +596,7 @@ PEG.compiler.emitter = function(ast) {
            '#{node.posVar} = pos;',
            '#block emit(node.expression)',
            'if (#{node.resultVar} !== null) {',
-            '  #{node.resultVar} = (function(#{formalParams.join(", ")}) {#{node.code}})(#{actualParams.join(", ")});',
+            '  #{node.resultVar} = (function(#{keys(node.params).join(", ")}) {#{node.code}})(#{values(node.params).join(", ")});',
            '}',
            'if (#{node.resultVar} === null) {',
            '  pos = #{node.posVar};',
@ -674,6 +674,8 @@ PEG.compiler.emitter = function(ast) {
  function fill(name, vars) {
    vars.string = quote;
    vars.pluck  = pluck;
+    vars.keys   = keys;
+    vars.values = values;
    vars.emit   = emit;

    return templates[name](vars);
@ -770,48 +772,10 @@ PEG.compiler.emitter = function(ast) {
    optional:     emitSimple("optional"),
    zero_or_more: emitSimple("zero_or_more"),
    one_or_more:  emitSimple("one_or_more"),
-
-    action: function(node) {
-      /*
-       * In case of sequences, we splat their elements into function arguments
-       * one by one. Example:
-       *
-       *   start: a:"a" b:"b" c:"c" { alert(arguments.length) }  // => 3
-       *
-       * This behavior is reflected in this function.
-       */
-
-      var formalParams;
-      var actualParams;
-
-      if (node.expression.type === "sequence") {
-        formalParams = [];
-        actualParams = [];
-
-        each(node.expression.elements, function(element, i) {
-          if (element.type === "labeled") {
-            formalParams.push(element.label);
-            actualParams.push(node.resultVar + '[' + i + ']');
-          }
-        });
-      } else if (node.expression.type === "labeled") {
-        formalParams = [node.expression.label];
-        actualParams = [node.resultVar];
-      } else {
-        formalParams = [];
-        actualParams = [];
-      }
-
-      return fill("action", {
-        node:           node,
-        formalParams:   formalParams,
-        actualParams:   actualParams
-      });
-    },
-
-    rule_ref: emitSimple("rule_ref"),
-    literal:  emitSimple("literal"),
-    any:      emitSimple("any"),
+    action:       emitSimple("action"),
+    rule_ref:     emitSimple("rule_ref"),
+    literal:      emitSimple("literal"),
+    any:          emitSimple("any"),

    "class": function(node) {
      var regexp;
--- a/src/passes.js
+++ b/src/passes.js
@ -314,5 +314,107 @@ PEG.compiler.passes = {
    });

    compute(ast, { result: 0, pos: 0 });
+  },
+
+  /*
+   * This pass walks through the AST and tracks what labels are visible at each
+   * point. For "action" nodes it computes parameter names and values for the
+   * function used in generated code. (In the emitter, user's code is wrapped
+   * into a function that is immediately executed. Its parameter names
+   * correspond to visible labels and its parameter values to their captured
+   * values). Implicitly, this pass defines scoping rules for labels.
+   *
+   * After running this pass, all "action" nodes will have a |params| property
+   * containing an object mapping parameter names to the expressions that will
+   * be used as their values.
+   */
+  computeParams: function(ast) {
+    var envs = [];
+
+    function scoped(f) {
+      envs.push({});
+      f();
+      envs.pop();
+    }
+
+    function nop() {}
+
+    function computeForScopedExpression(node) {
+      scoped(function() { compute(node.expression); });
+    }
+
+    var compute = buildNodeVisitor({
+      grammar:
+        function(node) {
+          var name;
+
+          for (name in node.rules) {
+            compute(node.rules[name]);
+          }
+        },
+
+      rule:         computeForScopedExpression,
+
+      choice:
+        function(node) {
+          scoped(function() { each(node.alternatives, compute); });
+        },
+
+      sequence:
+        function(node) {
+          var env = envs[envs.length - 1], name;
+
+          function fixup(name) {
+            each(pluck(node.elements, "resultVar"), function(resultVar, i) {
+              if (env[name].substr(0, resultVar.length) === resultVar) {
+                env[name] = node.resultVar + "[" + i + "]"
+                          + env[name].substr(resultVar.length);
+              }
+            });
+          }
+
+          each(node.elements, compute);
+
+          for (name in env) {
+            fixup(name);
+          }
+        },
+
+      labeled:
+        function(node) {
+          envs[envs.length - 1][node.label] = node.resultVar;
+
+          scoped(function() { compute(node.expression); });
+        },
+
+      simple_and:   computeForScopedExpression,
+      simple_not:   computeForScopedExpression,
+      semantic_and: nop,
+      semantic_not: nop,
+      optional:     computeForScopedExpression,
+      zero_or_more: computeForScopedExpression,
+      one_or_more:  computeForScopedExpression,
+
+      action:
+        function(node) {
+          scoped(function() {
+            var env = envs[envs.length - 1], params = {}, name;
+
+            compute(node.expression);
+
+            for (name in env) {
+              params[name] = env[name];
+            }
+            node.params = params;
+          });
+        },
+
+      rule_ref:     nop,
+      literal:      nop,
+      any:          nop,
+      "class":      nop
+    });
+
+    compute(ast);
  }
 };
--- a/src/utils.js
+++ b/src/utils.js
@ -46,6 +46,22 @@ function pluck(array, key) {
  return map(array, function (e) { return e[key]; });
 }

+function keys(object) {
+  var result = [];
+  for (var key in object) {
+    result.push(key);
+  }
+  return result;
+}
+
+function values(object) {
+  var result = [];
+  for (var key in object) {
+    result.push(object[key]);
+  }
+  return result;
+}
+
 /*
 * Returns a string padded on the left to a desired length with a character.
 *
--- a/test/passes-test.js
+++ b/test/passes-test.js
@ -442,4 +442,144 @@ test("computes variable names", function() {
  }
 });

+test("computes params", function() {
+  function extractNode(node)       { return node; }
+  function extractExpression(node) { return node.expression; }
+
+  var cases = [
+    /* Recursive walk */
+    {
+      grammar:   'start = a:"a" { }',
+      extractor: extractNode,
+      params:    { a: "result0" }
+    },
+    {
+      grammar:   'start = a:"a" { } / "b" / "c"',
+      extractor: function(node) { return node.alternatives[0]; },
+      params:    { a: "result0" }
+    },
+    {
+      grammar:   'start = "a" / "b" / c:"c" { }',
+      extractor: function(node) { return node.alternatives[2]; },
+      params:    { c: "result0" }
+    },
+    {
+      grammar:   'start = (a:"a" { }) "b" "c"',
+      extractor: function(node) { return node.elements[0]; },
+      params:    { a: "result0" }
+    },
+    {
+      grammar:   'start = "a" "b" (c:"c" { })',
+      extractor: function(node) { return node.elements[2]; },
+      params:    { c: "result2" }
+    },
+    {
+      grammar:   'start = a:(b:"b" { })',
+      extractor: extractExpression,
+      params:    { b: "result0" }
+    },
+    {
+      grammar:   'start = &(a:"a" { })',
+      extractor: extractExpression,
+      params:    { a: "result0" }
+    },
+    {
+      grammar:   'start = !(a:"a" { })',
+      extractor: extractExpression,
+      params:    { a: "result0" }
+    },
+    {
+      grammar:   'start = (a:"a" { })?',
+      extractor: extractExpression,
+      params:    { a: "result0" }
+    },
+    {
+      grammar:   'start = (a:"a" { })*',
+      extractor: extractExpression,
+      params:    { a: "result1" }
+    },
+    {
+      grammar:   'start = (a:"a" { })+',
+      extractor: extractExpression,
+      params:    { a: "result1" }
+    },
+    {
+      grammar:   'start = (a:"a" { }) { }',
+      extractor: extractExpression,
+      params:    { a: "result0" }
+    },
+
+    /* Scoping */
+    {
+      grammar:   'start = (a:"a" / b:"b" / c:"c") { }',
+      extractor: extractNode,
+      params:    { }
+    },
+    {
+      grammar:   'start = a:(b:"b") { }',
+      extractor: extractNode,
+      params:    { a: "result0" }
+    },
+    {
+      grammar:   'start = &(a:"a") { }',
+      extractor: extractNode,
+      params:    { }
+    },
+    {
+      grammar:   'start = !(a:"a") { }',
+      extractor: extractNode,
+      params:    { }
+    },
+    {
+      grammar:   'start = (a:"a")? { }',
+      extractor: extractNode,
+      params:    { }
+    },
+    {
+      grammar:   'start = (a:"a")* { }',
+      extractor: extractNode,
+      params:    { }
+    },
+    {
+      grammar:   'start = (a:"a")+ { }',
+      extractor: extractNode,
+      params:    { }
+    },
+    {
+      grammar:   'start = (a:"a" { }) { }',
+      extractor: extractNode,
+      params:    { }
+    },
+
+    /* Sequences */
+    {
+      grammar:   'start = a:"a" b:"b" c:"c" { }',
+      extractor: extractNode,
+      params:    { a: "result0[0]", b: "result0[1]", c: "result0[2]" }
+    },
+    {
+      grammar:   'start = a:"a" (b:"b" c:"c" d:"d") e:"e"{ }',
+      extractor: extractNode,
+      params:    {
+        a: "result0[0]",
+        b: "result0[1][0]",
+        c: "result0[1][1]",
+        d: "result0[1][2]",
+        e: "result0[2]"
+      }
+    }
+  ];
+
+  for (var i = 0; i < cases.length; i++) {
+    var ast = PEG.parser.parse(cases[i].grammar);
+    PEG.compiler.passes.computeVarNames(ast);
+    PEG.compiler.passes.computeParams(ast);
+
+    deepEqual(
+      cases[i].extractor(ast.rules["start"].expression).params,
+      cases[i].params
+    );
+  }
+});
+
 })();