From a2af1fe61227e8801ae02eb71ae53e938776653f Mon Sep 17 00:00:00 2001 From: David Majda Date: Mon, 27 Feb 2012 20:18:21 +0100 Subject: [PATCH] Semantic predicates now have access to preceding labels Part of a fix for GH-69. --- README.md | 4 +-- src/emitter.js | 4 +-- src/passes.js | 38 +++++++++++++---------- test/compiler-test.js | 72 +++++++++++++++++++++++++++++++++++++++++++ test/passes-test.js | 14 ++++++++- 5 files changed, 110 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index 08d9221..40987d1 100644 --- a/README.md +++ b/README.md @@ -168,13 +168,13 @@ Try to match the expression and. If the match does not succeed, just return an e #### & { *predicate* } -The predicate is a piece of JavaScript code that is executed as if it was inside a function. It should return some JavaScript value using the `return` statement. If the returned value evaluates to `true` in boolean context, just return an empty string and do not advance the parser position; otherwise consider the match failed. +The predicate is a piece of JavaScript code that is executed as if it was inside a function. It gets the match results of labeled expressions in preceding expression as its arguments. It should return some JavaScript value using the `return` statement. If the returned value evaluates to `true` in boolean context, just return an empty string and do not advance the parser position; otherwise consider the match failed. The code inside the predicate has access to all variables and functions defined in the initializer at the beginning of the grammar. Curly braces in the predicate code must be balanced. #### ! { *predicate* } -The predicate is a piece of JavaScript code that is executed as if it was inside a function. It should return some JavaScript value using the `return` statement. If the returned value evaluates to `false` in boolean context, just return an empty string and do not advance the parser position; otherwise consider the match failed. +The predicate is a piece of JavaScript code that is executed as if it was inside a function. It gets the match results of labeled expressions in preceding expression as its arguments. It should return some JavaScript value using the `return` statement. If the returned value evaluates to `false` in boolean context, just return an empty string and do not advance the parser position; otherwise consider the match failed. The code inside the predicate has access to all variables and functions defined in the initializer at the beginning of the grammar. Curly braces in the predicate code must be balanced. diff --git a/src/emitter.js b/src/emitter.js index 12d11af..f800076 100644 --- a/src/emitter.js +++ b/src/emitter.js @@ -563,10 +563,10 @@ PEG.compiler.emitter = function(ast) { '}' ], semantic_and: [ - '#{node.resultVar} = (function() {#{node.code}})() ? "" : null;' + '#{node.resultVar} = (function(#{keys(node.params).join(", ")}) {#{node.code}})(#{values(node.params).join(", ")}) ? "" : null;' ], semantic_not: [ - '#{node.resultVar} = (function() {#{node.code}})() ? null : "";' + '#{node.resultVar} = (function(#{keys(node.params).join(", ")}) {#{node.code}})(#{values(node.params).join(", ")}) ? null : "";' ], optional: [ '#block emit(node.expression)', diff --git a/src/passes.js b/src/passes.js index 08878f6..afa872e 100644 --- a/src/passes.js +++ b/src/passes.js @@ -318,15 +318,16 @@ PEG.compiler.passes = { /* * This pass walks through the AST and tracks what labels are visible at each - * point. For "action" nodes it computes parameter names and values for the - * function used in generated code. (In the emitter, user's code is wrapped - * into a function that is immediately executed. Its parameter names - * correspond to visible labels and its parameter values to their captured - * values). Implicitly, this pass defines scoping rules for labels. + * point. For "action", "semantic_and" and "semantic_or" nodes it computes + * parameter names and values for the function used in generated code. (In the + * emitter, user's code is wrapped into a function that is immediately + * executed. Its parameter names correspond to visible labels and its + * parameter values to their captured values). Implicitly, this pass defines + * scoping rules for labels. * - * After running this pass, all "action" nodes will have a |params| property - * containing an object mapping parameter names to the expressions that will - * be used as their values. + * After running this pass, all "action", "semantic_and" and "semantic_or" + * nodes will have a |params| property containing an object mapping parameter + * names to the expressions that will be used as their values. */ computeParams: function(ast) { var envs = []; @@ -343,6 +344,15 @@ PEG.compiler.passes = { scoped(function() { compute(node.expression); }); } + function computeParams(node) { + var env = envs[envs.length - 1], params = {}, name; + + for (name in env) { + params[name] = env[name]; + } + node.params = params; + } + var compute = buildNodeVisitor({ grammar: function(node) { @@ -389,8 +399,8 @@ PEG.compiler.passes = { simple_and: computeForScopedExpression, simple_not: computeForScopedExpression, - semantic_and: nop, - semantic_not: nop, + semantic_and: computeParams, + semantic_not: computeParams, optional: computeForScopedExpression, zero_or_more: computeForScopedExpression, one_or_more: computeForScopedExpression, @@ -398,14 +408,8 @@ PEG.compiler.passes = { action: function(node) { scoped(function() { - var env = envs[envs.length - 1], params = {}, name; - compute(node.expression); - - for (name in env) { - params[name] = env[name]; - } - node.params = params; + computeParams(node); }); }, diff --git a/test/compiler-test.js b/test/compiler-test.js index 9e9ca86..81dfe37 100644 --- a/test/compiler-test.js +++ b/test/compiler-test.js @@ -68,6 +68,42 @@ test("semantic and", function() { var rejectingParser = PEG.buildParser('start = "a" &{ return false; } "b"'); doesNotParse(rejectingParser, "ab"); + + var singleElementUnlabeledParser = PEG.buildParser( + 'start = "a" &{ return arguments.length === 0; }' + ); + parses(singleElementUnlabeledParser, "a", ["a", ""]); + + var singleElementLabeledParser = PEG.buildParser( + 'start = a:"a" &{ return arguments.length === 1 && a === "a"; }' + ); + parses(singleElementLabeledParser, "a", ["a", ""]); + + var multiElementUnlabeledParser = PEG.buildParser( + 'start = "a" "b" "c" &{ return arguments.length === 0; }' + ); + parses(multiElementUnlabeledParser, "abc", ["a", "b", "c", ""]); + + var multiElementLabeledParser = PEG.buildParser([ + 'start = a:"a" "b" c:"c"', + ' &{ return arguments.length === 2 && a === "a" && c === "c"; }' + ].join("\n")); + parses(multiElementLabeledParser, "abc", ["a", "b", "c", ""]); + + var innerElementsUnlabeledParser = PEG.buildParser( + 'start = "a" ("b" "c" "d" &{ return arguments.length === 0; }) "e"' + ); + parses(innerElementsUnlabeledParser, "abcde", ["a", ["b", "c", "d", ""], "e"]); + + var innerElementsLabeledParser = PEG.buildParser([ + 'start = "a"', + ' (', + ' b:"b" "c" d:"d"', + ' &{ return arguments.length === 2 && b === "b" && d === "d"; }', + ' )', + ' "e"' + ].join("\n")); + parses(innerElementsLabeledParser, "abcde", ["a", ["b", "c", "d", ""], "e"]); }); test("semantic not", function() { @@ -76,6 +112,42 @@ test("semantic not", function() { var rejectingParser = PEG.buildParser('start = "a" !{ return true; } "b"'); doesNotParse(rejectingParser, "ab"); + + var singleElementUnlabeledParser = PEG.buildParser( + 'start = "a" !{ return arguments.length !== 0; }' + ); + parses(singleElementUnlabeledParser, "a", ["a", ""]); + + var singleElementLabeledParser = PEG.buildParser( + 'start = a:"a" !{ return arguments.length !== 1 || a !== "a"; }' + ); + parses(singleElementLabeledParser, "a", ["a", ""]); + + var multiElementUnlabeledParser = PEG.buildParser( + 'start = "a" "b" "c" !{ return arguments.length !== 0; }' + ); + parses(multiElementUnlabeledParser, "abc", ["a", "b", "c", ""]); + + var multiElementLabeledParser = PEG.buildParser([ + 'start = a:"a" "b" c:"c"', + ' !{ return arguments.length !== 2 || a !== "a" || c !== "c"; }' + ].join("\n")); + parses(multiElementLabeledParser, "abc", ["a", "b", "c", ""]); + + var innerElementsUnlabeledParser = PEG.buildParser( + 'start = "a" ("b" "c" "d" !{ return arguments.length !== 0; }) "e"' + ); + parses(innerElementsUnlabeledParser, "abcde", ["a", ["b", "c", "d", ""], "e"]); + + var innerElementsLabeledParser = PEG.buildParser([ + 'start = "a"', + ' (', + ' b:"b" "c" d:"d"', + ' !{ return arguments.length !== 2 || b !== "b" || d !== "d"; }', + ' )', + ' "e"' + ].join("\n")); + parses(innerElementsLabeledParser, "abcde", ["a", ["b", "c", "d", ""], "e"]); }); test("optional expressions", function() { diff --git a/test/passes-test.js b/test/passes-test.js index 30dd5d7..2d5e403 100644 --- a/test/passes-test.js +++ b/test/passes-test.js @@ -447,12 +447,24 @@ test("computes params", function() { function extractExpression(node) { return node.expression; } var cases = [ - /* Recursive walk */ + /* Bacics */ { grammar: 'start = a:"a" { }', extractor: extractNode, params: { a: "result0" } }, + { + grammar: 'start = a:"a" &{ }', + extractor: function(node) { return node.elements[1]; }, + params: { a: "result0" } + }, + { + grammar: 'start = a:"a" !{ }', + extractor: function(node) { return node.elements[1]; }, + params: { a: "result0" } + }, + + /* Recursive walk */ { grammar: 'start = a:"a" { } / "b" / "c"', extractor: function(node) { return node.alternatives[0]; },