Semantic predicates now have access to preceding labels

Part of a fix for GH-69.
12 years ago · a2af1fe612
parent 4cf50bcf9f
commit a2af1fe612
5 changed files with 110 additions and 22 deletions
--- a/README.md
+++ b/README.md
@ -168,13 +168,13 @@ Try to match the expression and. If the match does not succeed, just return an e
 #### & { *predicate* }
-The predicate is a piece of JavaScript code that is executed as if it was inside a function. It should return some JavaScript value using the `return` statement. If the returned value evaluates to `true` in boolean context, just return an empty string and do not advance the parser position; otherwise consider the match failed.
+The predicate is a piece of JavaScript code that is executed as if it was inside a function. It gets the match results of labeled expressions in preceding expression as its arguments. It should return some JavaScript value using the `return` statement. If the returned value evaluates to `true` in boolean context, just return an empty string and do not advance the parser position; otherwise consider the match failed.
 The code inside the predicate has access to all variables and functions defined in the initializer at the beginning of the grammar. Curly braces in the predicate code must be balanced.
 #### ! { *predicate* }
-The predicate is a piece of JavaScript code that is executed as if it was inside a function. It should return some JavaScript value using the `return` statement. If the returned value evaluates to `false` in boolean context, just return an empty string and do not advance the parser position; otherwise consider the match failed.
+The predicate is a piece of JavaScript code that is executed as if it was inside a function. It gets the match results of labeled expressions in preceding expression as its arguments. It should return some JavaScript value using the `return` statement. If the returned value evaluates to `false` in boolean context, just return an empty string and do not advance the parser position; otherwise consider the match failed.
 The code inside the predicate has access to all variables and functions defined in the initializer at the beginning of the grammar. Curly braces in the predicate code must be balanced.
--- a/src/emitter.js
+++ b/src/emitter.js
@ -563,10 +563,10 @@ PEG.compiler.emitter = function(ast) {
            '}'
          ],
          semantic_and: [
-            '#{node.resultVar} = (function() {#{node.code}})() ? "" : null;'
+            '#{node.resultVar} = (function(#{keys(node.params).join(", ")}) {#{node.code}})(#{values(node.params).join(", ")}) ? "" : null;'
          ],
          semantic_not: [
-            '#{node.resultVar} = (function() {#{node.code}})() ? null : "";'
+            '#{node.resultVar} = (function(#{keys(node.params).join(", ")}) {#{node.code}})(#{values(node.params).join(", ")}) ? null : "";'
          ],
          optional: [
            '#block emit(node.expression)',
--- a/src/passes.js
+++ b/src/passes.js
@ -318,15 +318,16 @@ PEG.compiler.passes = {
  /*
   * This pass walks through the AST and tracks what labels are visible at each
-   * point. For "action" nodes it computes parameter names and values for the
+   * point. For "action", "semantic_and" and "semantic_or" nodes it computes
-   * function used in generated code. (In the emitter, user's code is wrapped
+   * parameter names and values for the function used in generated code. (In the
-   * into a function that is immediately executed. Its parameter names
+   * emitter, user's code is wrapped into a function that is immediately
-   * correspond to visible labels and its parameter values to their captured
+   * executed. Its parameter names correspond to visible labels and its
-   * values). Implicitly, this pass defines scoping rules for labels.
+   * parameter values to their captured values). Implicitly, this pass defines
   * scoping rules for labels.
   *
-   * After running this pass, all "action" nodes will have a |params| property
+   * After running this pass, all "action", "semantic_and" and "semantic_or"
-   * containing an object mapping parameter names to the expressions that will
+   * nodes will have a |params| property containing an object mapping parameter
-   * be used as their values.
+   * names to the expressions that will be used as their values.
   */
  computeParams: function(ast) {
    var envs = [];
@ -343,6 +344,15 @@ PEG.compiler.passes = {
      scoped(function() { compute(node.expression); });
    }
    function computeParams(node) {
      var env = envs[envs.length - 1], params = {}, name;
      for (name in env) {
        params[name] = env[name];
      }
      node.params = params;
    }
    var compute = buildNodeVisitor({
      grammar:
        function(node) {
@ -389,8 +399,8 @@ PEG.compiler.passes = {
      simple_and:   computeForScopedExpression,
      simple_not:   computeForScopedExpression,
-      semantic_and: nop,
+      semantic_and: computeParams,
-      semantic_not: nop,
+      semantic_not: computeParams,
      optional:     computeForScopedExpression,
      zero_or_more: computeForScopedExpression,
      one_or_more:  computeForScopedExpression,
@ -398,14 +408,8 @@ PEG.compiler.passes = {
      action:
        function(node) {
          scoped(function() {
            var env = envs[envs.length - 1], params = {}, name;
            compute(node.expression);
-
+            computeParams(node);
            for (name in env) {
              params[name] = env[name];
            }
            node.params = params;
          });
        },
--- a/test/compiler-test.js
+++ b/test/compiler-test.js
@ -68,6 +68,42 @@ test("semantic and", function() {
  var rejectingParser = PEG.buildParser('start = "a" &{ return false; } "b"');
  doesNotParse(rejectingParser, "ab");
  var singleElementUnlabeledParser = PEG.buildParser(
    'start = "a" &{ return arguments.length === 0; }'
  );
  parses(singleElementUnlabeledParser, "a", ["a", ""]);
  var singleElementLabeledParser = PEG.buildParser(
    'start = a:"a" &{ return arguments.length === 1 && a === "a"; }'
  );
  parses(singleElementLabeledParser, "a", ["a", ""]);
  var multiElementUnlabeledParser = PEG.buildParser(
    'start = "a" "b" "c" &{ return arguments.length === 0; }'
  );
  parses(multiElementUnlabeledParser, "abc", ["a", "b", "c", ""]);
  var multiElementLabeledParser = PEG.buildParser([
    'start = a:"a" "b" c:"c"',
    '        &{ return arguments.length === 2 && a === "a" && c === "c"; }'
  ].join("\n"));
  parses(multiElementLabeledParser, "abc", ["a", "b", "c", ""]);
  var innerElementsUnlabeledParser = PEG.buildParser(
    'start = "a" ("b" "c" "d" &{ return arguments.length === 0; }) "e"'
  );
  parses(innerElementsUnlabeledParser, "abcde", ["a", ["b", "c", "d", ""], "e"]);
  var innerElementsLabeledParser = PEG.buildParser([
    'start = "a"',
    '        (',
    '          b:"b" "c" d:"d"',
    '          &{ return arguments.length === 2 && b === "b" && d === "d"; }',
    '        )',
    '        "e"'
  ].join("\n"));
  parses(innerElementsLabeledParser, "abcde", ["a", ["b", "c", "d", ""], "e"]);
 });
 test("semantic not", function() {
@ -76,6 +112,42 @@ test("semantic not", function() {
  var rejectingParser = PEG.buildParser('start = "a" !{ return true; } "b"');
  doesNotParse(rejectingParser, "ab");
  var singleElementUnlabeledParser = PEG.buildParser(
    'start = "a" !{ return arguments.length !== 0; }'
  );
  parses(singleElementUnlabeledParser, "a", ["a", ""]);
  var singleElementLabeledParser = PEG.buildParser(
    'start = a:"a" !{ return arguments.length !== 1 || a !== "a"; }'
  );
  parses(singleElementLabeledParser, "a", ["a", ""]);
  var multiElementUnlabeledParser = PEG.buildParser(
    'start = "a" "b" "c" !{ return arguments.length !== 0; }'
  );
  parses(multiElementUnlabeledParser, "abc", ["a", "b", "c", ""]);
  var multiElementLabeledParser = PEG.buildParser([
    'start = a:"a" "b" c:"c"',
    '        !{ return arguments.length !== 2 || a !== "a" || c !== "c"; }'
  ].join("\n"));
  parses(multiElementLabeledParser, "abc", ["a", "b", "c", ""]);
  var innerElementsUnlabeledParser = PEG.buildParser(
    'start = "a" ("b" "c" "d" !{ return arguments.length !== 0; }) "e"'
  );
  parses(innerElementsUnlabeledParser, "abcde", ["a", ["b", "c", "d", ""], "e"]);
  var innerElementsLabeledParser = PEG.buildParser([
    'start = "a"',
    '        (',
    '          b:"b" "c" d:"d"',
    '          !{ return arguments.length !== 2 || b !== "b" || d !== "d"; }',
    '        )',
    '        "e"'
  ].join("\n"));
  parses(innerElementsLabeledParser, "abcde", ["a", ["b", "c", "d", ""], "e"]);
 });
 test("optional expressions", function() {
--- a/test/passes-test.js
+++ b/test/passes-test.js
@ -447,12 +447,24 @@ test("computes params", function() {
  function extractExpression(node) { return node.expression; }
  var cases = [
-    /* Recursive walk */
+    /* Bacics */
    {
      grammar:   'start = a:"a" { }',
      extractor: extractNode,
      params:    { a: "result0" }
    },
    {
      grammar:   'start = a:"a" &{ }',
      extractor: function(node) { return node.elements[1]; },
      params:    { a: "result0" }
    },
    {
      grammar:   'start = a:"a" !{ }',
      extractor: function(node) { return node.elements[1]; },
      params:    { a: "result0" }
    },
    /* Recursive walk */
    {
      grammar:   'start = a:"a" { } / "b" / "c"',
      extractor: function(node) { return node.alternatives[0]; },