Fix labels leaking to outer scope

Labels in expressions like "(a:"a")" or "(a:"a" b:"b" c:"c")" were
visible to the outside despite being wrapped in parens. This commit
makes them invisible, as they should be.

Note this required introduction of a new "group" AST node, whose purpose
is purely to provide label scope isolation. This was necessary because
"label" and "sequence" nodes don't (and can't!) provide this isolation
themselves.

Part of a fix of #396.
redux
David Majda 8 years ago
parent 58806a3d77
commit 0c39f1cf86

@ -42,6 +42,7 @@ var asts = {
optional: consumesFalse, optional: consumesFalse,
zero_or_more: consumesFalse, zero_or_more: consumesFalse,
one_or_more: consumesExpression, one_or_more: consumesExpression,
group: consumesExpression,
semantic_and: consumesFalse, semantic_and: consumesFalse,
semantic_not: consumesFalse, semantic_not: consumesFalse,

@ -510,6 +510,14 @@ function generateBytecode(ast) {
); );
}, },
group: function(node, context) {
return generate(node.expression, {
sp: context.sp,
env: objects.clone(context.env),
action: null
});
},
semantic_and: function(node, context) { semantic_and: function(node, context) {
return buildSemanticPredicate(node.code, false, context); return buildSemanticPredicate(node.code, false, context);
}, },

@ -54,6 +54,7 @@ var visitor = {
optional: visitExpression, optional: visitExpression,
zero_or_more: visitExpression, zero_or_more: visitExpression,
one_or_more: visitExpression, one_or_more: visitExpression,
group: visitExpression,
semantic_and: visitNop, semantic_and: visitNop,
semantic_not: visitNop, semantic_not: visitNop,
rule_ref: visitNop, rule_ref: visitNop,

@ -135,7 +135,17 @@ module.exports = (function() {
peg$c28 = { type: "literal", value: "(", description: "\"(\"" }, peg$c28 = { type: "literal", value: "(", description: "\"(\"" },
peg$c29 = ")", peg$c29 = ")",
peg$c30 = { type: "literal", value: ")", description: "\")\"" }, peg$c30 = { type: "literal", value: ")", description: "\")\"" },
peg$c31 = function(expression) { return expression; }, peg$c31 = function(expression) {
/*
* The purpose of the "group" AST node is just to isolate label scope. We
* don't need to put it around nodes that can't contain any labels or
* nodes that already isolate label scope themselves. This leaves us with
* "labeled" and "sequence".
*/
return expression.type === 'labeled' || expression.type === 'sequence'
? { type: "group", expression: expression }
: expression;
},
peg$c32 = function(name) { peg$c32 = function(name) {
return { type: "rule_ref", name: name, location: location() }; return { type: "rule_ref", name: name, location: location() };
}, },

@ -466,12 +466,10 @@ describe("generated parser behavior", function() {
it("cannot access variables defined by subexpressions", function() { it("cannot access variables defined by subexpressions", function() {
var testcases = [ var testcases = [
// TODO: Fix #396. {
// grammar: 'start = (a:"a") &{ return a === "a"; }',
// { input: "a"
// grammar: 'start = (a:"a") &{ return a === "a"; }', },
// input: "a"
// },
{ {
grammar: 'start = (a:"a")? &{ return a === "a"; }', grammar: 'start = (a:"a")? &{ return a === "a"; }',
input: "a" input: "a"
@ -500,12 +498,10 @@ describe("generated parser behavior", function() {
grammar: 'start = b:(a:"a") &{ return a === "a"; }', grammar: 'start = b:(a:"a") &{ return a === "a"; }',
input: "a" input: "a"
}, },
// TODO: Fix #396. {
// grammar: 'start = ("a" b:"b" "c") &{ return b === "b"; }',
// { input: "abc"
// grammar: 'start = ("a" b:"b" "c") &{ return b === "b"; }', },
// input: "abc"
// },
{ {
grammar: 'start = (a:"a" { return a; }) &{ return a === "a"; }', grammar: 'start = (a:"a" { return a; }) &{ return a === "a"; }',
input: "a" input: "a"
@ -670,12 +666,10 @@ describe("generated parser behavior", function() {
it("cannot access variables defined by subexpressions", function() { it("cannot access variables defined by subexpressions", function() {
var testcases = [ var testcases = [
// TODO: Fix #396. {
// grammar: 'start = (a:"a") !{ return a !== "a"; }',
// { input: "a"
// grammar: 'start = (a:"a") !{ return a !== "a"; }', },
// input: "a"
// },
{ {
grammar: 'start = (a:"a")? !{ return a !== "a"; }', grammar: 'start = (a:"a")? !{ return a !== "a"; }',
input: "a" input: "a"
@ -704,12 +698,10 @@ describe("generated parser behavior", function() {
grammar: 'start = b:(a:"a") !{ return a !== "a"; }', grammar: 'start = b:(a:"a") !{ return a !== "a"; }',
input: "a" input: "a"
}, },
// TODO: Fix #396. {
// grammar: 'start = ("a" b:"b" "c") !{ return b !== "b"; }',
// { input: "abc"
// grammar: 'start = ("a" b:"b" "c") !{ return b !== "b"; }', },
// input: "abc"
// },
{ {
grammar: 'start = (a:"a" { return a; }) !{ return a !== "a"; }', grammar: 'start = (a:"a" { return a; }) !{ return a !== "a"; }',
input: "a" input: "a"
@ -1052,12 +1044,10 @@ describe("generated parser behavior", function() {
it("cannot access variables defined by subexpressions", function() { it("cannot access variables defined by subexpressions", function() {
var testcases = [ var testcases = [
// TODO: Fix #396. {
// grammar: 'start = (a:"a") { return a; }',
// { input: "a"
// grammar: 'start = (a:"a") { return a; }', },
// input: "a"
// },
{ {
grammar: 'start = (a:"a")? { return a; }', grammar: 'start = (a:"a")? { return a; }',
input: "a" input: "a"
@ -1086,12 +1076,10 @@ describe("generated parser behavior", function() {
grammar: 'start = b:(a:"a") { return a; }', grammar: 'start = b:(a:"a") { return a; }',
input: "a" input: "a"
}, },
// TODO: Fix #396. {
// grammar: 'start = ("a" b:"b" "c") { return b; }',
// { input: "abc"
// grammar: 'start = ("a" b:"b" "c") { return b; }', },
// input: "abc"
// },
{ {
grammar: 'start = (a:"a" { return a; }) { return a; }', grammar: 'start = (a:"a" { return a; }) { return a; }',
input: "a" input: "a"

@ -357,6 +357,14 @@ describe("compiler pass |generateBytecode|", function() {
}); });
}); });
describe("for group", function() {
it("generates correct bytecode", function() {
expect(pass).toChangeAST('start = ("a")', bytecodeDetails([
18, 0, 2, 2, 22, 0, 23, 1 // <expression>
]));
});
});
describe("for semantic_and", function() { describe("for semantic_and", function() {
describe("without labels", function() { describe("without labels", function() {
var grammar = 'start = &{ code }'; var grammar = 'start = &{ code }';

@ -69,6 +69,9 @@ describe("compiler pass |reportInfiniteLoops|", function() {
expect(pass).toReportError('start = (""+)*'); expect(pass).toReportError('start = (""+)*');
expect(pass).not.toReportError('start = ("a"+)*'); expect(pass).not.toReportError('start = ("a"+)*');
expect(pass).toReportError('start = ("")*');
expect(pass).not.toReportError('start = ("a")*');
expect(pass).toReportError('start = (&{ })*'); expect(pass).toReportError('start = (&{ })*');
expect(pass).toReportError('start = (!{ })*'); expect(pass).toReportError('start = (!{ })*');

@ -88,6 +88,9 @@ describe("compiler pass |reportLeftRecursion|", function() {
expect(pass).toReportError('start = ""+ start'); expect(pass).toReportError('start = ""+ start');
expect(pass).not.toReportError('start = "a"+ start'); expect(pass).not.toReportError('start = "a"+ start');
expect(pass).toReportError('start = ("") start');
expect(pass).not.toReportError('start = ("a") start');
expect(pass).toReportError('start = &{ } start'); expect(pass).toReportError('start = &{ } start');
expect(pass).toReportError('start = !{ } start'); expect(pass).toReportError('start = !{ } start');

@ -33,6 +33,8 @@ describe("PEG.js grammar parser", function() {
type: "sequence", type: "sequence",
elements: [labeledAbcd, labeledEfgh, labeledIjkl, labeledMnop] elements: [labeledAbcd, labeledEfgh, labeledIjkl, labeledMnop]
}, },
groupLabeled = { type: "group", expression: labeledAbcd },
groupSequence = { type: "group", expression: sequence },
actionAbcd = { type: "action", expression: literalAbcd, code: " code " }, actionAbcd = { type: "action", expression: literalAbcd, code: " code " },
actionEfgh = { type: "action", expression: literalEfgh, code: " code " }, actionEfgh = { type: "action", expression: literalEfgh, code: " code " },
actionIjkl = { type: "action", expression: literalIjkl, code: " code " }, actionIjkl = { type: "action", expression: literalIjkl, code: " code " },
@ -162,6 +164,7 @@ describe("PEG.js grammar parser", function() {
optional: stripExpression, optional: stripExpression,
zero_or_more: stripExpression, zero_or_more: stripExpression,
one_or_more: stripExpression, one_or_more: stripExpression,
group: stripExpression,
semantic_and: stripLeaf, semantic_and: stripLeaf,
semantic_not: stripLeaf, semantic_not: stripLeaf,
rule_ref: stripLeaf, rule_ref: stripLeaf,
@ -365,12 +368,15 @@ describe("PEG.js grammar parser", function() {
/* Canonical PrimaryExpression is "\"abcd\"". */ /* Canonical PrimaryExpression is "\"abcd\"". */
it("parses PrimaryExpression", function() { it("parses PrimaryExpression", function() {
expect('start = "abcd"' ).toParseAs(trivialGrammar); expect('start = "abcd"' ).toParseAs(trivialGrammar);
expect('start = [a-d]' ).toParseAs(classGrammar([["a", "d"]], false, false, '[a-d]')); expect('start = [a-d]' ).toParseAs(classGrammar([["a", "d"]], false, false, '[a-d]'));
expect('start = .' ).toParseAs(anyGrammar()); expect('start = .' ).toParseAs(anyGrammar());
expect('start = a' ).toParseAs(ruleRefGrammar("a")); expect('start = a' ).toParseAs(ruleRefGrammar("a"));
expect('start = &{ code }' ).toParseAs(oneRuleGrammar(semanticAnd)); expect('start = &{ code }').toParseAs(oneRuleGrammar(semanticAnd));
expect('start = (\n"abcd"\n)').toParseAs(trivialGrammar);
expect('start = (\na:"abcd"\n)' ).toParseAs(oneRuleGrammar(groupLabeled));
expect('start = (\n"abcd" "efgh" "ijkl"\n)').toParseAs(oneRuleGrammar(groupSequence));
expect('start = (\n"abcd"\n)' ).toParseAs(trivialGrammar);
}); });
/* Canonical RuleReferenceExpression is "a". */ /* Canonical RuleReferenceExpression is "a". */

@ -194,7 +194,17 @@ PrimaryExpression
/ AnyMatcher / AnyMatcher
/ RuleReferenceExpression / RuleReferenceExpression
/ SemanticPredicateExpression / SemanticPredicateExpression
/ "(" __ expression:Expression __ ")" { return expression; } / "(" __ expression:Expression __ ")" {
/*
* The purpose of the "group" AST node is just to isolate label scope. We
* don't need to put it around nodes that can't contain any labels or
* nodes that already isolate label scope themselves. This leaves us with
* "labeled" and "sequence".
*/
return expression.type === 'labeled' || expression.type === 'sequence'
? { type: "group", expression: expression }
: expression;
}
RuleReferenceExpression RuleReferenceExpression
= name:IdentifierName !(__ (StringLiteral __)? "=") { = name:IdentifierName !(__ (StringLiteral __)? "=") {

Loading…
Cancel
Save