Semantic predicates now have access to preceding labels

Part of a fix for GH-69.
12 years ago · a2af1fe612
parent 4cf50bcf9f
commit a2af1fe612
5 changed files with 110 additions and 22 deletions
--- a/README.md
+++ b/README.md
@ -168,13 +168,13 @@ Try to match the expression and. If the match does not succeed, just return an e

 #### & { *predicate* }

-The predicate is a piece of JavaScript code that is executed as if it was inside a function. It should return some JavaScript value using the `return` statement. If the returned value evaluates to `true` in boolean context, just return an empty string and do not advance the parser position; otherwise consider the match failed.
+The predicate is a piece of JavaScript code that is executed as if it was inside a function. It gets the match results of labeled expressions in preceding expression as its arguments. It should return some JavaScript value using the `return` statement. If the returned value evaluates to `true` in boolean context, just return an empty string and do not advance the parser position; otherwise consider the match failed.

 The code inside the predicate has access to all variables and functions defined in the initializer at the beginning of the grammar. Curly braces in the predicate code must be balanced.

 #### ! { *predicate* }

-The predicate is a piece of JavaScript code that is executed as if it was inside a function. It should return some JavaScript value using the `return` statement. If the returned value evaluates to `false` in boolean context, just return an empty string and do not advance the parser position; otherwise consider the match failed.
+The predicate is a piece of JavaScript code that is executed as if it was inside a function. It gets the match results of labeled expressions in preceding expression as its arguments. It should return some JavaScript value using the `return` statement. If the returned value evaluates to `false` in boolean context, just return an empty string and do not advance the parser position; otherwise consider the match failed.

 The code inside the predicate has access to all variables and functions defined in the initializer at the beginning of the grammar. Curly braces in the predicate code must be balanced.

--- a/src/emitter.js
+++ b/src/emitter.js
@ -563,10 +563,10 @@ PEG.compiler.emitter = function(ast) {
            '}'
          ],
          semantic_and: [
-            '#{node.resultVar} = (function() {#{node.code}})() ? "" : null;'
+            '#{node.resultVar} = (function(#{keys(node.params).join(", ")}) {#{node.code}})(#{values(node.params).join(", ")}) ? "" : null;'
          ],
          semantic_not: [
-            '#{node.resultVar} = (function() {#{node.code}})() ? null : "";'
+            '#{node.resultVar} = (function(#{keys(node.params).join(", ")}) {#{node.code}})(#{values(node.params).join(", ")}) ? null : "";'
          ],
          optional: [
            '#block emit(node.expression)',
--- a/src/passes.js
+++ b/src/passes.js
@ -318,15 +318,16 @@ PEG.compiler.passes = {

  /*
   * This pass walks through the AST and tracks what labels are visible at each
-   * point. For "action" nodes it computes parameter names and values for the
-   * function used in generated code. (In the emitter, user's code is wrapped
-   * into a function that is immediately executed. Its parameter names
-   * correspond to visible labels and its parameter values to their captured
-   * values). Implicitly, this pass defines scoping rules for labels.
+   * point. For "action", "semantic_and" and "semantic_or" nodes it computes
+   * parameter names and values for the function used in generated code. (In the
+   * emitter, user's code is wrapped into a function that is immediately
+   * executed. Its parameter names correspond to visible labels and its
+   * parameter values to their captured values). Implicitly, this pass defines
+   * scoping rules for labels.
   *
-   * After running this pass, all "action" nodes will have a |params| property
-   * containing an object mapping parameter names to the expressions that will
-   * be used as their values.
+   * After running this pass, all "action", "semantic_and" and "semantic_or"
+   * nodes will have a |params| property containing an object mapping parameter
+   * names to the expressions that will be used as their values.
   */
  computeParams: function(ast) {
    var envs = [];
@ -343,6 +344,15 @@ PEG.compiler.passes = {
      scoped(function() { compute(node.expression); });
    }

+    function computeParams(node) {
+      var env = envs[envs.length - 1], params = {}, name;
+
+      for (name in env) {
+        params[name] = env[name];
+      }
+      node.params = params;
+    }
+
    var compute = buildNodeVisitor({
      grammar:
        function(node) {
@ -389,8 +399,8 @@ PEG.compiler.passes = {

      simple_and:   computeForScopedExpression,
      simple_not:   computeForScopedExpression,
-      semantic_and: nop,
-      semantic_not: nop,
+      semantic_and: computeParams,
+      semantic_not: computeParams,
      optional:     computeForScopedExpression,
      zero_or_more: computeForScopedExpression,
      one_or_more:  computeForScopedExpression,
@ -398,14 +408,8 @@ PEG.compiler.passes = {
      action:
        function(node) {
          scoped(function() {
-            var env = envs[envs.length - 1], params = {}, name;
-
            compute(node.expression);
-
-            for (name in env) {
-              params[name] = env[name];
-            }
-            node.params = params;
+            computeParams(node);
          });
        },

--- a/test/compiler-test.js
+++ b/test/compiler-test.js
@ -68,6 +68,42 @@ test("semantic and", function() {

  var rejectingParser = PEG.buildParser('start = "a" &{ return false; } "b"');
  doesNotParse(rejectingParser, "ab");
+
+  var singleElementUnlabeledParser = PEG.buildParser(
+    'start = "a" &{ return arguments.length === 0; }'
+  );
+  parses(singleElementUnlabeledParser, "a", ["a", ""]);
+
+  var singleElementLabeledParser = PEG.buildParser(
+    'start = a:"a" &{ return arguments.length === 1 && a === "a"; }'
+  );
+  parses(singleElementLabeledParser, "a", ["a", ""]);
+
+  var multiElementUnlabeledParser = PEG.buildParser(
+    'start = "a" "b" "c" &{ return arguments.length === 0; }'
+  );
+  parses(multiElementUnlabeledParser, "abc", ["a", "b", "c", ""]);
+
+  var multiElementLabeledParser = PEG.buildParser([
+    'start = a:"a" "b" c:"c"',
+    '        &{ return arguments.length === 2 && a === "a" && c === "c"; }'
+  ].join("\n"));
+  parses(multiElementLabeledParser, "abc", ["a", "b", "c", ""]);
+
+  var innerElementsUnlabeledParser = PEG.buildParser(
+    'start = "a" ("b" "c" "d" &{ return arguments.length === 0; }) "e"'
+  );
+  parses(innerElementsUnlabeledParser, "abcde", ["a", ["b", "c", "d", ""], "e"]);
+
+  var innerElementsLabeledParser = PEG.buildParser([
+    'start = "a"',
+    '        (',
+    '          b:"b" "c" d:"d"',
+    '          &{ return arguments.length === 2 && b === "b" && d === "d"; }',
+    '        )',
+    '        "e"'
+  ].join("\n"));
+  parses(innerElementsLabeledParser, "abcde", ["a", ["b", "c", "d", ""], "e"]);
 });

 test("semantic not", function() {
@ -76,6 +112,42 @@ test("semantic not", function() {

  var rejectingParser = PEG.buildParser('start = "a" !{ return true; } "b"');
  doesNotParse(rejectingParser, "ab");
+
+  var singleElementUnlabeledParser = PEG.buildParser(
+    'start = "a" !{ return arguments.length !== 0; }'
+  );
+  parses(singleElementUnlabeledParser, "a", ["a", ""]);
+
+  var singleElementLabeledParser = PEG.buildParser(
+    'start = a:"a" !{ return arguments.length !== 1 || a !== "a"; }'
+  );
+  parses(singleElementLabeledParser, "a", ["a", ""]);
+
+  var multiElementUnlabeledParser = PEG.buildParser(
+    'start = "a" "b" "c" !{ return arguments.length !== 0; }'
+  );
+  parses(multiElementUnlabeledParser, "abc", ["a", "b", "c", ""]);
+
+  var multiElementLabeledParser = PEG.buildParser([
+    'start = a:"a" "b" c:"c"',
+    '        !{ return arguments.length !== 2 || a !== "a" || c !== "c"; }'
+  ].join("\n"));
+  parses(multiElementLabeledParser, "abc", ["a", "b", "c", ""]);
+
+  var innerElementsUnlabeledParser = PEG.buildParser(
+    'start = "a" ("b" "c" "d" !{ return arguments.length !== 0; }) "e"'
+  );
+  parses(innerElementsUnlabeledParser, "abcde", ["a", ["b", "c", "d", ""], "e"]);
+
+  var innerElementsLabeledParser = PEG.buildParser([
+    'start = "a"',
+    '        (',
+    '          b:"b" "c" d:"d"',
+    '          !{ return arguments.length !== 2 || b !== "b" || d !== "d"; }',
+    '        )',
+    '        "e"'
+  ].join("\n"));
+  parses(innerElementsLabeledParser, "abcde", ["a", ["b", "c", "d", ""], "e"]);
 });

 test("optional expressions", function() {
--- a/test/passes-test.js
+++ b/test/passes-test.js
@ -447,12 +447,24 @@ test("computes params", function() {
  function extractExpression(node) { return node.expression; }

  var cases = [
-    /* Recursive walk */
+    /* Bacics */
    {
      grammar:   'start = a:"a" { }',
      extractor: extractNode,
      params:    { a: "result0" }
    },
+    {
+      grammar:   'start = a:"a" &{ }',
+      extractor: function(node) { return node.elements[1]; },
+      params:    { a: "result0" }
+    },
+    {
+      grammar:   'start = a:"a" !{ }',
+      extractor: function(node) { return node.elements[1]; },
+      params:    { a: "result0" }
+    },
+
+    /* Recursive walk */
    {
      grammar:   'start = a:"a" { } / "b" / "c"',
      extractor: function(node) { return node.alternatives[0]; },