From 0c39f1cf86d73c85a1f832e6cfddf4176f9e58bc Mon Sep 17 00:00:00 2001 From: David Majda Date: Fri, 11 Mar 2016 16:18:29 +0100 Subject: [PATCH] Fix labels leaking to outer scope Labels in expressions like "(a:"a")" or "(a:"a" b:"b" c:"c")" were visible to the outside despite being wrapped in parens. This commit makes them invisible, as they should be. Note this required introduction of a new "group" AST node, whose purpose is purely to provide label scope isolation. This was necessary because "label" and "sequence" nodes don't (and can't!) provide this isolation themselves. Part of a fix of #396. --- lib/compiler/asts.js | 1 + lib/compiler/passes/generate-bytecode.js | 8 +++ lib/compiler/visitor.js | 1 + lib/parser.js | 12 +++- .../generated-parser-behavior.spec.js | 60 ++++++++----------- .../compiler/passes/generate-bytecode.spec.js | 8 +++ .../passes/report-infinite-loops.spec.js | 3 + .../passes/report-left-recursion.spec.js | 3 + spec/unit/parser.spec.js | 18 ++++-- src/parser.pegjs | 12 +++- 10 files changed, 82 insertions(+), 44 deletions(-) diff --git a/lib/compiler/asts.js b/lib/compiler/asts.js index fbca4fb..7dac649 100644 --- a/lib/compiler/asts.js +++ b/lib/compiler/asts.js @@ -42,6 +42,7 @@ var asts = { optional: consumesFalse, zero_or_more: consumesFalse, one_or_more: consumesExpression, + group: consumesExpression, semantic_and: consumesFalse, semantic_not: consumesFalse, diff --git a/lib/compiler/passes/generate-bytecode.js b/lib/compiler/passes/generate-bytecode.js index 1249e80..c0fdeba 100644 --- a/lib/compiler/passes/generate-bytecode.js +++ b/lib/compiler/passes/generate-bytecode.js @@ -510,6 +510,14 @@ function generateBytecode(ast) { ); }, + group: function(node, context) { + return generate(node.expression, { + sp: context.sp, + env: objects.clone(context.env), + action: null + }); + }, + semantic_and: function(node, context) { return buildSemanticPredicate(node.code, false, context); }, diff --git a/lib/compiler/visitor.js b/lib/compiler/visitor.js index 70e6f12..0a447bc 100644 --- a/lib/compiler/visitor.js +++ b/lib/compiler/visitor.js @@ -54,6 +54,7 @@ var visitor = { optional: visitExpression, zero_or_more: visitExpression, one_or_more: visitExpression, + group: visitExpression, semantic_and: visitNop, semantic_not: visitNop, rule_ref: visitNop, diff --git a/lib/parser.js b/lib/parser.js index 8ca58c4..56c812a 100644 --- a/lib/parser.js +++ b/lib/parser.js @@ -135,7 +135,17 @@ module.exports = (function() { peg$c28 = { type: "literal", value: "(", description: "\"(\"" }, peg$c29 = ")", peg$c30 = { type: "literal", value: ")", description: "\")\"" }, - peg$c31 = function(expression) { return expression; }, + peg$c31 = function(expression) { + /* + * The purpose of the "group" AST node is just to isolate label scope. We + * don't need to put it around nodes that can't contain any labels or + * nodes that already isolate label scope themselves. This leaves us with + * "labeled" and "sequence". + */ + return expression.type === 'labeled' || expression.type === 'sequence' + ? { type: "group", expression: expression } + : expression; + }, peg$c32 = function(name) { return { type: "rule_ref", name: name, location: location() }; }, diff --git a/spec/behavior/generated-parser-behavior.spec.js b/spec/behavior/generated-parser-behavior.spec.js index bb73189..41937f7 100644 --- a/spec/behavior/generated-parser-behavior.spec.js +++ b/spec/behavior/generated-parser-behavior.spec.js @@ -466,12 +466,10 @@ describe("generated parser behavior", function() { it("cannot access variables defined by subexpressions", function() { var testcases = [ - // TODO: Fix #396. - // - // { - // grammar: 'start = (a:"a") &{ return a === "a"; }', - // input: "a" - // }, + { + grammar: 'start = (a:"a") &{ return a === "a"; }', + input: "a" + }, { grammar: 'start = (a:"a")? &{ return a === "a"; }', input: "a" @@ -500,12 +498,10 @@ describe("generated parser behavior", function() { grammar: 'start = b:(a:"a") &{ return a === "a"; }', input: "a" }, - // TODO: Fix #396. - // - // { - // grammar: 'start = ("a" b:"b" "c") &{ return b === "b"; }', - // input: "abc" - // }, + { + grammar: 'start = ("a" b:"b" "c") &{ return b === "b"; }', + input: "abc" + }, { grammar: 'start = (a:"a" { return a; }) &{ return a === "a"; }', input: "a" @@ -670,12 +666,10 @@ describe("generated parser behavior", function() { it("cannot access variables defined by subexpressions", function() { var testcases = [ - // TODO: Fix #396. - // - // { - // grammar: 'start = (a:"a") !{ return a !== "a"; }', - // input: "a" - // }, + { + grammar: 'start = (a:"a") !{ return a !== "a"; }', + input: "a" + }, { grammar: 'start = (a:"a")? !{ return a !== "a"; }', input: "a" @@ -704,12 +698,10 @@ describe("generated parser behavior", function() { grammar: 'start = b:(a:"a") !{ return a !== "a"; }', input: "a" }, - // TODO: Fix #396. - // - // { - // grammar: 'start = ("a" b:"b" "c") !{ return b !== "b"; }', - // input: "abc" - // }, + { + grammar: 'start = ("a" b:"b" "c") !{ return b !== "b"; }', + input: "abc" + }, { grammar: 'start = (a:"a" { return a; }) !{ return a !== "a"; }', input: "a" @@ -1052,12 +1044,10 @@ describe("generated parser behavior", function() { it("cannot access variables defined by subexpressions", function() { var testcases = [ - // TODO: Fix #396. - // - // { - // grammar: 'start = (a:"a") { return a; }', - // input: "a" - // }, + { + grammar: 'start = (a:"a") { return a; }', + input: "a" + }, { grammar: 'start = (a:"a")? { return a; }', input: "a" @@ -1086,12 +1076,10 @@ describe("generated parser behavior", function() { grammar: 'start = b:(a:"a") { return a; }', input: "a" }, - // TODO: Fix #396. - // - // { - // grammar: 'start = ("a" b:"b" "c") { return b; }', - // input: "abc" - // }, + { + grammar: 'start = ("a" b:"b" "c") { return b; }', + input: "abc" + }, { grammar: 'start = (a:"a" { return a; }) { return a; }', input: "a" diff --git a/spec/unit/compiler/passes/generate-bytecode.spec.js b/spec/unit/compiler/passes/generate-bytecode.spec.js index de293af..1c5ac15 100644 --- a/spec/unit/compiler/passes/generate-bytecode.spec.js +++ b/spec/unit/compiler/passes/generate-bytecode.spec.js @@ -357,6 +357,14 @@ describe("compiler pass |generateBytecode|", function() { }); }); + describe("for group", function() { + it("generates correct bytecode", function() { + expect(pass).toChangeAST('start = ("a")', bytecodeDetails([ + 18, 0, 2, 2, 22, 0, 23, 1 // + ])); + }); + }); + describe("for semantic_and", function() { describe("without labels", function() { var grammar = 'start = &{ code }'; diff --git a/spec/unit/compiler/passes/report-infinite-loops.spec.js b/spec/unit/compiler/passes/report-infinite-loops.spec.js index 1758b93..c700335 100644 --- a/spec/unit/compiler/passes/report-infinite-loops.spec.js +++ b/spec/unit/compiler/passes/report-infinite-loops.spec.js @@ -69,6 +69,9 @@ describe("compiler pass |reportInfiniteLoops|", function() { expect(pass).toReportError('start = (""+)*'); expect(pass).not.toReportError('start = ("a"+)*'); + expect(pass).toReportError('start = ("")*'); + expect(pass).not.toReportError('start = ("a")*'); + expect(pass).toReportError('start = (&{ })*'); expect(pass).toReportError('start = (!{ })*'); diff --git a/spec/unit/compiler/passes/report-left-recursion.spec.js b/spec/unit/compiler/passes/report-left-recursion.spec.js index 8fd168e..7ad62f0 100644 --- a/spec/unit/compiler/passes/report-left-recursion.spec.js +++ b/spec/unit/compiler/passes/report-left-recursion.spec.js @@ -88,6 +88,9 @@ describe("compiler pass |reportLeftRecursion|", function() { expect(pass).toReportError('start = ""+ start'); expect(pass).not.toReportError('start = "a"+ start'); + expect(pass).toReportError('start = ("") start'); + expect(pass).not.toReportError('start = ("a") start'); + expect(pass).toReportError('start = &{ } start'); expect(pass).toReportError('start = !{ } start'); diff --git a/spec/unit/parser.spec.js b/spec/unit/parser.spec.js index cc1d87a..4df7995 100644 --- a/spec/unit/parser.spec.js +++ b/spec/unit/parser.spec.js @@ -33,6 +33,8 @@ describe("PEG.js grammar parser", function() { type: "sequence", elements: [labeledAbcd, labeledEfgh, labeledIjkl, labeledMnop] }, + groupLabeled = { type: "group", expression: labeledAbcd }, + groupSequence = { type: "group", expression: sequence }, actionAbcd = { type: "action", expression: literalAbcd, code: " code " }, actionEfgh = { type: "action", expression: literalEfgh, code: " code " }, actionIjkl = { type: "action", expression: literalIjkl, code: " code " }, @@ -162,6 +164,7 @@ describe("PEG.js grammar parser", function() { optional: stripExpression, zero_or_more: stripExpression, one_or_more: stripExpression, + group: stripExpression, semantic_and: stripLeaf, semantic_not: stripLeaf, rule_ref: stripLeaf, @@ -365,12 +368,15 @@ describe("PEG.js grammar parser", function() { /* Canonical PrimaryExpression is "\"abcd\"". */ it("parses PrimaryExpression", function() { - expect('start = "abcd"' ).toParseAs(trivialGrammar); - expect('start = [a-d]' ).toParseAs(classGrammar([["a", "d"]], false, false, '[a-d]')); - expect('start = .' ).toParseAs(anyGrammar()); - expect('start = a' ).toParseAs(ruleRefGrammar("a")); - expect('start = &{ code }' ).toParseAs(oneRuleGrammar(semanticAnd)); - expect('start = (\n"abcd"\n)').toParseAs(trivialGrammar); + expect('start = "abcd"' ).toParseAs(trivialGrammar); + expect('start = [a-d]' ).toParseAs(classGrammar([["a", "d"]], false, false, '[a-d]')); + expect('start = .' ).toParseAs(anyGrammar()); + expect('start = a' ).toParseAs(ruleRefGrammar("a")); + expect('start = &{ code }').toParseAs(oneRuleGrammar(semanticAnd)); + + expect('start = (\na:"abcd"\n)' ).toParseAs(oneRuleGrammar(groupLabeled)); + expect('start = (\n"abcd" "efgh" "ijkl"\n)').toParseAs(oneRuleGrammar(groupSequence)); + expect('start = (\n"abcd"\n)' ).toParseAs(trivialGrammar); }); /* Canonical RuleReferenceExpression is "a". */ diff --git a/src/parser.pegjs b/src/parser.pegjs index 13f33cf..87962ef 100644 --- a/src/parser.pegjs +++ b/src/parser.pegjs @@ -194,7 +194,17 @@ PrimaryExpression / AnyMatcher / RuleReferenceExpression / SemanticPredicateExpression - / "(" __ expression:Expression __ ")" { return expression; } + / "(" __ expression:Expression __ ")" { + /* + * The purpose of the "group" AST node is just to isolate label scope. We + * don't need to put it around nodes that can't contain any labels or + * nodes that already isolate label scope themselves. This leaves us with + * "labeled" and "sequence". + */ + return expression.type === 'labeled' || expression.type === 'sequence' + ? { type: "group", expression: expression } + : expression; + } RuleReferenceExpression = name:IdentifierName !(__ (StringLiteral __)? "=") {