From 4cf50bcf9f4fa62833d07e85b1948c7e7a056e49 Mon Sep 17 00:00:00 2001 From: David Majda Date: Mon, 27 Feb 2012 20:18:03 +0100 Subject: [PATCH] Move param computations from the emitter into a separate pass This has two main benefits: 1. The knowledge about scoping params in at one designated place, making all future adjustments in this area easier. 2. Action-related code does not handle sequences specially anymore. Such knowledge/behavior doesn't belong there. --- src/compiler.js | 3 +- src/emitter.js | 50 +++------------- src/passes.js | 102 ++++++++++++++++++++++++++++++++ src/utils.js | 16 +++++ test/passes-test.js | 140 ++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 267 insertions(+), 44 deletions(-) diff --git a/src/compiler.js b/src/compiler.js index c95baed..d473cc4 100644 --- a/src/compiler.js +++ b/src/compiler.js @@ -7,7 +7,8 @@ PEG.compiler = { "reportMissingRules", "reportLeftRecursion", "removeProxyRules", - "computeVarNames" + "computeVarNames", + "computeParams" ], /* diff --git a/src/emitter.js b/src/emitter.js index e9a741a..12d11af 100644 --- a/src/emitter.js +++ b/src/emitter.js @@ -596,7 +596,7 @@ PEG.compiler.emitter = function(ast) { '#{node.posVar} = pos;', '#block emit(node.expression)', 'if (#{node.resultVar} !== null) {', - ' #{node.resultVar} = (function(#{formalParams.join(", ")}) {#{node.code}})(#{actualParams.join(", ")});', + ' #{node.resultVar} = (function(#{keys(node.params).join(", ")}) {#{node.code}})(#{values(node.params).join(", ")});', '}', 'if (#{node.resultVar} === null) {', ' pos = #{node.posVar};', @@ -674,6 +674,8 @@ PEG.compiler.emitter = function(ast) { function fill(name, vars) { vars.string = quote; vars.pluck = pluck; + vars.keys = keys; + vars.values = values; vars.emit = emit; return templates[name](vars); @@ -770,48 +772,10 @@ PEG.compiler.emitter = function(ast) { optional: emitSimple("optional"), zero_or_more: emitSimple("zero_or_more"), one_or_more: emitSimple("one_or_more"), - - action: function(node) { - /* - * In case of sequences, we splat their elements into function arguments - * one by one. Example: - * - * start: a:"a" b:"b" c:"c" { alert(arguments.length) } // => 3 - * - * This behavior is reflected in this function. - */ - - var formalParams; - var actualParams; - - if (node.expression.type === "sequence") { - formalParams = []; - actualParams = []; - - each(node.expression.elements, function(element, i) { - if (element.type === "labeled") { - formalParams.push(element.label); - actualParams.push(node.resultVar + '[' + i + ']'); - } - }); - } else if (node.expression.type === "labeled") { - formalParams = [node.expression.label]; - actualParams = [node.resultVar]; - } else { - formalParams = []; - actualParams = []; - } - - return fill("action", { - node: node, - formalParams: formalParams, - actualParams: actualParams - }); - }, - - rule_ref: emitSimple("rule_ref"), - literal: emitSimple("literal"), - any: emitSimple("any"), + action: emitSimple("action"), + rule_ref: emitSimple("rule_ref"), + literal: emitSimple("literal"), + any: emitSimple("any"), "class": function(node) { var regexp; diff --git a/src/passes.js b/src/passes.js index ad2bdba..08878f6 100644 --- a/src/passes.js +++ b/src/passes.js @@ -314,5 +314,107 @@ PEG.compiler.passes = { }); compute(ast, { result: 0, pos: 0 }); + }, + + /* + * This pass walks through the AST and tracks what labels are visible at each + * point. For "action" nodes it computes parameter names and values for the + * function used in generated code. (In the emitter, user's code is wrapped + * into a function that is immediately executed. Its parameter names + * correspond to visible labels and its parameter values to their captured + * values). Implicitly, this pass defines scoping rules for labels. + * + * After running this pass, all "action" nodes will have a |params| property + * containing an object mapping parameter names to the expressions that will + * be used as their values. + */ + computeParams: function(ast) { + var envs = []; + + function scoped(f) { + envs.push({}); + f(); + envs.pop(); + } + + function nop() {} + + function computeForScopedExpression(node) { + scoped(function() { compute(node.expression); }); + } + + var compute = buildNodeVisitor({ + grammar: + function(node) { + var name; + + for (name in node.rules) { + compute(node.rules[name]); + } + }, + + rule: computeForScopedExpression, + + choice: + function(node) { + scoped(function() { each(node.alternatives, compute); }); + }, + + sequence: + function(node) { + var env = envs[envs.length - 1], name; + + function fixup(name) { + each(pluck(node.elements, "resultVar"), function(resultVar, i) { + if (env[name].substr(0, resultVar.length) === resultVar) { + env[name] = node.resultVar + "[" + i + "]" + + env[name].substr(resultVar.length); + } + }); + } + + each(node.elements, compute); + + for (name in env) { + fixup(name); + } + }, + + labeled: + function(node) { + envs[envs.length - 1][node.label] = node.resultVar; + + scoped(function() { compute(node.expression); }); + }, + + simple_and: computeForScopedExpression, + simple_not: computeForScopedExpression, + semantic_and: nop, + semantic_not: nop, + optional: computeForScopedExpression, + zero_or_more: computeForScopedExpression, + one_or_more: computeForScopedExpression, + + action: + function(node) { + scoped(function() { + var env = envs[envs.length - 1], params = {}, name; + + compute(node.expression); + + for (name in env) { + params[name] = env[name]; + } + node.params = params; + }); + }, + + rule_ref: nop, + literal: nop, + any: nop, + "class": nop + }); + + compute(ast); } }; diff --git a/src/utils.js b/src/utils.js index 3c76570..4935894 100644 --- a/src/utils.js +++ b/src/utils.js @@ -46,6 +46,22 @@ function pluck(array, key) { return map(array, function (e) { return e[key]; }); } +function keys(object) { + var result = []; + for (var key in object) { + result.push(key); + } + return result; +} + +function values(object) { + var result = []; + for (var key in object) { + result.push(object[key]); + } + return result; +} + /* * Returns a string padded on the left to a desired length with a character. * diff --git a/test/passes-test.js b/test/passes-test.js index fb30472..30dd5d7 100644 --- a/test/passes-test.js +++ b/test/passes-test.js @@ -442,4 +442,144 @@ test("computes variable names", function() { } }); +test("computes params", function() { + function extractNode(node) { return node; } + function extractExpression(node) { return node.expression; } + + var cases = [ + /* Recursive walk */ + { + grammar: 'start = a:"a" { }', + extractor: extractNode, + params: { a: "result0" } + }, + { + grammar: 'start = a:"a" { } / "b" / "c"', + extractor: function(node) { return node.alternatives[0]; }, + params: { a: "result0" } + }, + { + grammar: 'start = "a" / "b" / c:"c" { }', + extractor: function(node) { return node.alternatives[2]; }, + params: { c: "result0" } + }, + { + grammar: 'start = (a:"a" { }) "b" "c"', + extractor: function(node) { return node.elements[0]; }, + params: { a: "result0" } + }, + { + grammar: 'start = "a" "b" (c:"c" { })', + extractor: function(node) { return node.elements[2]; }, + params: { c: "result2" } + }, + { + grammar: 'start = a:(b:"b" { })', + extractor: extractExpression, + params: { b: "result0" } + }, + { + grammar: 'start = &(a:"a" { })', + extractor: extractExpression, + params: { a: "result0" } + }, + { + grammar: 'start = !(a:"a" { })', + extractor: extractExpression, + params: { a: "result0" } + }, + { + grammar: 'start = (a:"a" { })?', + extractor: extractExpression, + params: { a: "result0" } + }, + { + grammar: 'start = (a:"a" { })*', + extractor: extractExpression, + params: { a: "result1" } + }, + { + grammar: 'start = (a:"a" { })+', + extractor: extractExpression, + params: { a: "result1" } + }, + { + grammar: 'start = (a:"a" { }) { }', + extractor: extractExpression, + params: { a: "result0" } + }, + + /* Scoping */ + { + grammar: 'start = (a:"a" / b:"b" / c:"c") { }', + extractor: extractNode, + params: { } + }, + { + grammar: 'start = a:(b:"b") { }', + extractor: extractNode, + params: { a: "result0" } + }, + { + grammar: 'start = &(a:"a") { }', + extractor: extractNode, + params: { } + }, + { + grammar: 'start = !(a:"a") { }', + extractor: extractNode, + params: { } + }, + { + grammar: 'start = (a:"a")? { }', + extractor: extractNode, + params: { } + }, + { + grammar: 'start = (a:"a")* { }', + extractor: extractNode, + params: { } + }, + { + grammar: 'start = (a:"a")+ { }', + extractor: extractNode, + params: { } + }, + { + grammar: 'start = (a:"a" { }) { }', + extractor: extractNode, + params: { } + }, + + /* Sequences */ + { + grammar: 'start = a:"a" b:"b" c:"c" { }', + extractor: extractNode, + params: { a: "result0[0]", b: "result0[1]", c: "result0[2]" } + }, + { + grammar: 'start = a:"a" (b:"b" c:"c" d:"d") e:"e"{ }', + extractor: extractNode, + params: { + a: "result0[0]", + b: "result0[1][0]", + c: "result0[1][1]", + d: "result0[1][2]", + e: "result0[2]" + } + } + ]; + + for (var i = 0; i < cases.length; i++) { + var ast = PEG.parser.parse(cases[i].grammar); + PEG.compiler.passes.computeVarNames(ast); + PEG.compiler.passes.computeParams(ast); + + deepEqual( + cases[i].extractor(ast.rules["start"].expression).params, + cases[i].params + ); + } +}); + })();