Fix labels leaking to outer scope

Labels in expressions like "(a:"a")" or "(a:"a" b:"b" c:"c")" were visible to the outside despite being wrapped in parens. This commit makes them invisible, as they should be. Note this required introduction of a new "group" AST node, whose purpose is purely to provide label scope isolation. This was necessary because "label" and "sequence" nodes don't (and can't!) provide this isolation themselves. Part of a fix of #396.
8 years ago · 0c39f1cf86
parent 58806a3d77
commit 0c39f1cf86
10 changed files with 82 additions and 44 deletions
--- a/lib/compiler/asts.js
+++ b/lib/compiler/asts.js
@ -42,6 +42,7 @@ var asts = {
      optional:     consumesFalse,
      zero_or_more: consumesFalse,
      one_or_more:  consumesExpression,
      group:        consumesExpression,
      semantic_and: consumesFalse,
      semantic_not: consumesFalse,
--- a/lib/compiler/passes/generate-bytecode.js
+++ b/lib/compiler/passes/generate-bytecode.js
@ -510,6 +510,14 @@ function generateBytecode(ast) {
      );
    },
    group: function(node, context) {
      return generate(node.expression, {
        sp:     context.sp,
        env:    objects.clone(context.env),
        action: null
      });
    },
    semantic_and: function(node, context) {
      return buildSemanticPredicate(node.code, false, context);
    },
--- a/lib/compiler/visitor.js
+++ b/lib/compiler/visitor.js
@ -54,6 +54,7 @@ var visitor = {
          optional:     visitExpression,
          zero_or_more: visitExpression,
          one_or_more:  visitExpression,
          group:        visitExpression,
          semantic_and: visitNop,
          semantic_not: visitNop,
          rule_ref:     visitNop,
--- a/lib/parser.js
+++ b/lib/parser.js
@ -135,7 +135,17 @@ module.exports = (function() {
        peg$c28 = { type: "literal", value: "(", description: "\"(\"" },
        peg$c29 = ")",
        peg$c30 = { type: "literal", value: ")", description: "\")\"" },
-        peg$c31 = function(expression) { return expression; },
+        peg$c31 = function(expression) {
              /*
               * The purpose of the "group" AST node is just to isolate label scope. We
               * don't need to put it around nodes that can't contain any labels or
               * nodes that already isolate label scope themselves. This leaves us with
               * "labeled" and "sequence".
               */
              return expression.type === 'labeled' || expression.type === 'sequence'
                  ? { type: "group", expression: expression }
                  : expression;
            },
        peg$c32 = function(name) {
              return { type: "rule_ref", name: name, location: location() };
            },
--- a/spec/behavior/generated-parser-behavior.spec.js
+++ b/spec/behavior/generated-parser-behavior.spec.js
@ -466,12 +466,10 @@ describe("generated parser behavior", function() {
          it("cannot access variables defined by subexpressions", function() {
            var testcases = [
-                  // TODO: Fix #396.
+                  {
-                  //
+                    grammar: 'start = (a:"a") &{ return a === "a"; }',
-                  // {
+                    input:   "a"
-                  //   grammar: 'start = (a:"a") &{ return a === "a"; }',
+                  },
                  //   input:   "a"
                  // },
                  {
                    grammar: 'start = (a:"a")? &{ return a === "a"; }',
                    input:   "a"
@ -500,12 +498,10 @@ describe("generated parser behavior", function() {
                    grammar: 'start = b:(a:"a") &{ return a === "a"; }',
                    input:   "a"
                  },
-                  // TODO: Fix #396.
+                  {
-                  //
+                    grammar: 'start = ("a" b:"b" "c") &{ return b === "b"; }',
-                  // {
+                    input:   "abc"
-                  //   grammar: 'start = ("a" b:"b" "c") &{ return b === "b"; }',
+                  },
                  //   input:   "abc"
                  // },
                  {
                    grammar: 'start = (a:"a" { return a; }) &{ return a === "a"; }',
                    input:   "a"
@ -670,12 +666,10 @@ describe("generated parser behavior", function() {
          it("cannot access variables defined by subexpressions", function() {
            var testcases = [
-                  // TODO: Fix #396.
+                  {
-                  //
+                    grammar: 'start = (a:"a") !{ return a !== "a"; }',
-                  // {
+                    input:   "a"
-                  //   grammar: 'start = (a:"a") !{ return a !== "a"; }',
+                  },
                  //   input:   "a"
                  // },
                  {
                    grammar: 'start = (a:"a")? !{ return a !== "a"; }',
                    input:   "a"
@ -704,12 +698,10 @@ describe("generated parser behavior", function() {
                    grammar: 'start = b:(a:"a") !{ return a !== "a"; }',
                    input:   "a"
                  },
-                  // TODO: Fix #396.
+                  {
-                  //
+                    grammar: 'start = ("a" b:"b" "c") !{ return b !== "b"; }',
-                  // {
+                    input:   "abc"
-                  //   grammar: 'start = ("a" b:"b" "c") !{ return b !== "b"; }',
+                  },
                  //   input:   "abc"
                  // },
                  {
                    grammar: 'start = (a:"a" { return a; }) !{ return a !== "a"; }',
                    input:   "a"
@ -1052,12 +1044,10 @@ describe("generated parser behavior", function() {
            it("cannot access variables defined by subexpressions", function() {
              var testcases = [
-                    // TODO: Fix #396.
+                    {
-                    //
+                      grammar: 'start = (a:"a") { return a; }',
-                    // {
+                      input:   "a"
-                    //   grammar: 'start = (a:"a") { return a; }',
+                    },
                    //   input:   "a"
                    // },
                    {
                      grammar: 'start = (a:"a")? { return a; }',
                      input:   "a"
@ -1086,12 +1076,10 @@ describe("generated parser behavior", function() {
                      grammar: 'start = b:(a:"a") { return a; }',
                      input:   "a"
                    },
-                    // TODO: Fix #396.
+                    {
-                    //
+                      grammar: 'start = ("a" b:"b" "c") { return b; }',
-                    // {
+                      input:   "abc"
-                    //   grammar: 'start = ("a" b:"b" "c") { return b; }',
+                    },
                    //   input:   "abc"
                    // },
                    {
                      grammar: 'start = (a:"a" { return a; }) { return a; }',
                      input:   "a"
--- a/spec/unit/compiler/passes/generate-bytecode.spec.js
+++ b/spec/unit/compiler/passes/generate-bytecode.spec.js
@ -357,6 +357,14 @@ describe("compiler pass |generateBytecode|", function() {
    });
  });
  describe("for group", function() {
    it("generates correct bytecode", function() {
      expect(pass).toChangeAST('start = ("a")', bytecodeDetails([
        18, 0, 2, 2, 22, 0, 23, 1   // <expression>
      ]));
    });
  });
  describe("for semantic_and", function() {
    describe("without labels", function() {
      var grammar = 'start = &{ code }';
--- a/spec/unit/compiler/passes/report-infinite-loops.spec.js
+++ b/spec/unit/compiler/passes/report-infinite-loops.spec.js
@ -69,6 +69,9 @@ describe("compiler pass |reportInfiniteLoops|", function() {
    expect(pass).toReportError('start = (""+)*');
    expect(pass).not.toReportError('start = ("a"+)*');
    expect(pass).toReportError('start = ("")*');
    expect(pass).not.toReportError('start = ("a")*');
    expect(pass).toReportError('start = (&{ })*');
    expect(pass).toReportError('start = (!{ })*');
--- a/spec/unit/compiler/passes/report-left-recursion.spec.js
+++ b/spec/unit/compiler/passes/report-left-recursion.spec.js
@ -88,6 +88,9 @@ describe("compiler pass |reportLeftRecursion|", function() {
      expect(pass).toReportError('start = ""+ start');
      expect(pass).not.toReportError('start = "a"+ start');
      expect(pass).toReportError('start = ("") start');
      expect(pass).not.toReportError('start = ("a") start');
      expect(pass).toReportError('start = &{ } start');
      expect(pass).toReportError('start = !{ } start');
--- a/spec/unit/parser.spec.js
+++ b/spec/unit/parser.spec.js
@ -33,6 +33,8 @@ describe("PEG.js grammar parser", function() {
        type:     "sequence",
        elements: [labeledAbcd, labeledEfgh, labeledIjkl, labeledMnop]
      },
      groupLabeled      = { type: "group",  expression: labeledAbcd },
      groupSequence     = { type: "group",  expression: sequence    },
      actionAbcd        = { type: "action", expression: literalAbcd, code: " code " },
      actionEfgh        = { type: "action", expression: literalEfgh, code: " code " },
      actionIjkl        = { type: "action", expression: literalIjkl, code: " code " },
@ -162,6 +164,7 @@ describe("PEG.js grammar parser", function() {
      optional:     stripExpression,
      zero_or_more: stripExpression,
      one_or_more:  stripExpression,
      group:        stripExpression,
      semantic_and: stripLeaf,
      semantic_not: stripLeaf,
      rule_ref:     stripLeaf,
@ -365,12 +368,15 @@ describe("PEG.js grammar parser", function() {
  /* Canonical PrimaryExpression is "\"abcd\"". */
  it("parses PrimaryExpression", function() {
-    expect('start = "abcd"'      ).toParseAs(trivialGrammar);
+    expect('start = "abcd"'   ).toParseAs(trivialGrammar);
-    expect('start = [a-d]'       ).toParseAs(classGrammar([["a", "d"]], false, false, '[a-d]'));
+    expect('start = [a-d]'    ).toParseAs(classGrammar([["a", "d"]], false, false, '[a-d]'));
-    expect('start = .'           ).toParseAs(anyGrammar());
+    expect('start = .'        ).toParseAs(anyGrammar());
-    expect('start = a'           ).toParseAs(ruleRefGrammar("a"));
+    expect('start = a'        ).toParseAs(ruleRefGrammar("a"));
-    expect('start = &{ code }'   ).toParseAs(oneRuleGrammar(semanticAnd));
+    expect('start = &{ code }').toParseAs(oneRuleGrammar(semanticAnd));
-    expect('start = (\n"abcd"\n)').toParseAs(trivialGrammar);
+
    expect('start = (\na:"abcd"\n)'            ).toParseAs(oneRuleGrammar(groupLabeled));
    expect('start = (\n"abcd" "efgh" "ijkl"\n)').toParseAs(oneRuleGrammar(groupSequence));
    expect('start = (\n"abcd"\n)'              ).toParseAs(trivialGrammar);
  });
  /* Canonical RuleReferenceExpression is "a". */
--- a/src/parser.pegjs
+++ b/src/parser.pegjs
@ -194,7 +194,17 @@ PrimaryExpression
  / AnyMatcher
  / RuleReferenceExpression
  / SemanticPredicateExpression
-  / "(" __ expression:Expression __ ")" { return expression; }
+  / "(" __ expression:Expression __ ")" {
      /*
       * The purpose of the "group" AST node is just to isolate label scope. We
       * don't need to put it around nodes that can't contain any labels or
       * nodes that already isolate label scope themselves. This leaves us with
       * "labeled" and "sequence".
       */
      return expression.type === 'labeled' || expression.type === 'sequence'
          ? { type: "group", expression: expression }
          : expression;
    }
 RuleReferenceExpression
  = name:IdentifierName !(__ (StringLiteral __)? "=") {