Fix labels leaking to outer scope

Labels in expressions like "(a:"a")" or "(a:"a" b:"b" c:"c")" were
visible to the outside despite being wrapped in parens. This commit
makes them invisible, as they should be.

Note this required introduction of a new "group" AST node, whose purpose
is purely to provide label scope isolation. This was necessary because
"label" and "sequence" nodes don't (and can't!) provide this isolation
themselves.

Part of a fix of #396.
redux
David Majda 9 years ago
parent 58806a3d77
commit 0c39f1cf86

@ -42,6 +42,7 @@ var asts = {
optional: consumesFalse,
zero_or_more: consumesFalse,
one_or_more: consumesExpression,
group: consumesExpression,
semantic_and: consumesFalse,
semantic_not: consumesFalse,

@ -510,6 +510,14 @@ function generateBytecode(ast) {
);
},
group: function(node, context) {
return generate(node.expression, {
sp: context.sp,
env: objects.clone(context.env),
action: null
});
},
semantic_and: function(node, context) {
return buildSemanticPredicate(node.code, false, context);
},

@ -54,6 +54,7 @@ var visitor = {
optional: visitExpression,
zero_or_more: visitExpression,
one_or_more: visitExpression,
group: visitExpression,
semantic_and: visitNop,
semantic_not: visitNop,
rule_ref: visitNop,

@ -135,7 +135,17 @@ module.exports = (function() {
peg$c28 = { type: "literal", value: "(", description: "\"(\"" },
peg$c29 = ")",
peg$c30 = { type: "literal", value: ")", description: "\")\"" },
peg$c31 = function(expression) { return expression; },
peg$c31 = function(expression) {
/*
* The purpose of the "group" AST node is just to isolate label scope. We
* don't need to put it around nodes that can't contain any labels or
* nodes that already isolate label scope themselves. This leaves us with
* "labeled" and "sequence".
*/
return expression.type === 'labeled' || expression.type === 'sequence'
? { type: "group", expression: expression }
: expression;
},
peg$c32 = function(name) {
return { type: "rule_ref", name: name, location: location() };
},

@ -466,12 +466,10 @@ describe("generated parser behavior", function() {
it("cannot access variables defined by subexpressions", function() {
var testcases = [
// TODO: Fix #396.
//
// {
// grammar: 'start = (a:"a") &{ return a === "a"; }',
// input: "a"
// },
{
grammar: 'start = (a:"a") &{ return a === "a"; }',
input: "a"
},
{
grammar: 'start = (a:"a")? &{ return a === "a"; }',
input: "a"
@ -500,12 +498,10 @@ describe("generated parser behavior", function() {
grammar: 'start = b:(a:"a") &{ return a === "a"; }',
input: "a"
},
// TODO: Fix #396.
//
// {
// grammar: 'start = ("a" b:"b" "c") &{ return b === "b"; }',
// input: "abc"
// },
{
grammar: 'start = ("a" b:"b" "c") &{ return b === "b"; }',
input: "abc"
},
{
grammar: 'start = (a:"a" { return a; }) &{ return a === "a"; }',
input: "a"
@ -670,12 +666,10 @@ describe("generated parser behavior", function() {
it("cannot access variables defined by subexpressions", function() {
var testcases = [
// TODO: Fix #396.
//
// {
// grammar: 'start = (a:"a") !{ return a !== "a"; }',
// input: "a"
// },
{
grammar: 'start = (a:"a") !{ return a !== "a"; }',
input: "a"
},
{
grammar: 'start = (a:"a")? !{ return a !== "a"; }',
input: "a"
@ -704,12 +698,10 @@ describe("generated parser behavior", function() {
grammar: 'start = b:(a:"a") !{ return a !== "a"; }',
input: "a"
},
// TODO: Fix #396.
//
// {
// grammar: 'start = ("a" b:"b" "c") !{ return b !== "b"; }',
// input: "abc"
// },
{
grammar: 'start = ("a" b:"b" "c") !{ return b !== "b"; }',
input: "abc"
},
{
grammar: 'start = (a:"a" { return a; }) !{ return a !== "a"; }',
input: "a"
@ -1052,12 +1044,10 @@ describe("generated parser behavior", function() {
it("cannot access variables defined by subexpressions", function() {
var testcases = [
// TODO: Fix #396.
//
// {
// grammar: 'start = (a:"a") { return a; }',
// input: "a"
// },
{
grammar: 'start = (a:"a") { return a; }',
input: "a"
},
{
grammar: 'start = (a:"a")? { return a; }',
input: "a"
@ -1086,12 +1076,10 @@ describe("generated parser behavior", function() {
grammar: 'start = b:(a:"a") { return a; }',
input: "a"
},
// TODO: Fix #396.
//
// {
// grammar: 'start = ("a" b:"b" "c") { return b; }',
// input: "abc"
// },
{
grammar: 'start = ("a" b:"b" "c") { return b; }',
input: "abc"
},
{
grammar: 'start = (a:"a" { return a; }) { return a; }',
input: "a"

@ -357,6 +357,14 @@ describe("compiler pass |generateBytecode|", function() {
});
});
describe("for group", function() {
it("generates correct bytecode", function() {
expect(pass).toChangeAST('start = ("a")', bytecodeDetails([
18, 0, 2, 2, 22, 0, 23, 1 // <expression>
]));
});
});
describe("for semantic_and", function() {
describe("without labels", function() {
var grammar = 'start = &{ code }';

@ -69,6 +69,9 @@ describe("compiler pass |reportInfiniteLoops|", function() {
expect(pass).toReportError('start = (""+)*');
expect(pass).not.toReportError('start = ("a"+)*');
expect(pass).toReportError('start = ("")*');
expect(pass).not.toReportError('start = ("a")*');
expect(pass).toReportError('start = (&{ })*');
expect(pass).toReportError('start = (!{ })*');

@ -88,6 +88,9 @@ describe("compiler pass |reportLeftRecursion|", function() {
expect(pass).toReportError('start = ""+ start');
expect(pass).not.toReportError('start = "a"+ start');
expect(pass).toReportError('start = ("") start');
expect(pass).not.toReportError('start = ("a") start');
expect(pass).toReportError('start = &{ } start');
expect(pass).toReportError('start = !{ } start');

@ -33,6 +33,8 @@ describe("PEG.js grammar parser", function() {
type: "sequence",
elements: [labeledAbcd, labeledEfgh, labeledIjkl, labeledMnop]
},
groupLabeled = { type: "group", expression: labeledAbcd },
groupSequence = { type: "group", expression: sequence },
actionAbcd = { type: "action", expression: literalAbcd, code: " code " },
actionEfgh = { type: "action", expression: literalEfgh, code: " code " },
actionIjkl = { type: "action", expression: literalIjkl, code: " code " },
@ -162,6 +164,7 @@ describe("PEG.js grammar parser", function() {
optional: stripExpression,
zero_or_more: stripExpression,
one_or_more: stripExpression,
group: stripExpression,
semantic_and: stripLeaf,
semantic_not: stripLeaf,
rule_ref: stripLeaf,
@ -370,6 +373,9 @@ describe("PEG.js grammar parser", function() {
expect('start = .' ).toParseAs(anyGrammar());
expect('start = a' ).toParseAs(ruleRefGrammar("a"));
expect('start = &{ code }').toParseAs(oneRuleGrammar(semanticAnd));
expect('start = (\na:"abcd"\n)' ).toParseAs(oneRuleGrammar(groupLabeled));
expect('start = (\n"abcd" "efgh" "ijkl"\n)').toParseAs(oneRuleGrammar(groupSequence));
expect('start = (\n"abcd"\n)' ).toParseAs(trivialGrammar);
});

@ -194,7 +194,17 @@ PrimaryExpression
/ AnyMatcher
/ RuleReferenceExpression
/ SemanticPredicateExpression
/ "(" __ expression:Expression __ ")" { return expression; }
/ "(" __ expression:Expression __ ")" {
/*
* The purpose of the "group" AST node is just to isolate label scope. We
* don't need to put it around nodes that can't contain any labels or
* nodes that already isolate label scope themselves. This leaves us with
* "labeled" and "sequence".
*/
return expression.type === 'labeled' || expression.type === 'sequence'
? { type: "group", expression: expression }
: expression;
}
RuleReferenceExpression
= name:IdentifierName !(__ (StringLiteral __)? "=") {

Loading…
Cancel
Save