From 4d5b1d58aa9b2d8abbf6ed88a1387e5b765ebccc Mon Sep 17 00:00:00 2001 From: David Majda Date: Sun, 4 Mar 2012 17:29:32 +0100 Subject: [PATCH] AST: Store rules in an array instead of an object This simplifies the code a bit and makes the AST more regular (each node type has a fixed set of properties). The latter may get useful later when generalizing visitors. --- src/emitter.js | 29 +++----------- src/parser.js | 93 ++++++++++++++++++++++----------------------- src/parser.pegjs | 5 +-- src/passes.js | 78 ++++++++++++++++--------------------- src/utils.js | 13 +++++++ test/parser-test.js | 22 +++++------ test/passes-test.js | 18 ++++----- 7 files changed, 117 insertions(+), 141 deletions(-) diff --git a/src/emitter.js b/src/emitter.js index 1012fe1..ada2092 100644 --- a/src/emitter.js +++ b/src/emitter.js @@ -281,7 +281,9 @@ PEG.compiler.emitter = function(ast) { ' */', ' parse: function(input, startRule) {', ' var parseFunctions = {', - ' #block parseFunctionTableItems.join(",\\n")', + ' #for rule in node.rules', + ' #{string(rule.name) + ": parse_" + rule.name},', + ' #end', ' };', ' ', ' if (startRule !== undefined) {', @@ -340,8 +342,8 @@ PEG.compiler.emitter = function(ast) { ' rightmostFailuresExpected.push(failure);', ' }', ' ', - ' #for definition in parseFunctionDefinitions', - ' #block definition', + ' #for rule in node.rules', + ' #block emit(rule)', ' ', ' #end', ' ', @@ -686,26 +688,7 @@ PEG.compiler.emitter = function(ast) { } var emit = buildNodeVisitor({ - grammar: function(node) { - var name; - - var parseFunctionTableItems = []; - for (name in node.rules) { - parseFunctionTableItems.push(quote(name) + ": parse_" + name); - } - parseFunctionTableItems.sort(); - - var parseFunctionDefinitions = []; - for (name in node.rules) { - parseFunctionDefinitions.push(emit(node.rules[name])); - } - - return fill("grammar", { - node: node, - parseFunctionTableItems: parseFunctionTableItems, - parseFunctionDefinitions: parseFunctionDefinitions - }); - }, + grammar: emitSimple("grammar"), initializer: function(node) { return node.code; }, diff --git a/src/parser.js b/src/parser.js index fa1cb56..d76f4ab 100644 --- a/src/parser.js +++ b/src/parser.js @@ -33,62 +33,62 @@ PEG.parser = (function(){ */ parse: function(input, startRule) { var parseFunctions = { - "__": parse___, - "action": parse_action, - "and": parse_and, - "braced": parse_braced, - "bracketDelimitedCharacter": parse_bracketDelimitedCharacter, - "choice": parse_choice, - "class": parse_class, - "classCharacter": parse_classCharacter, - "classCharacterRange": parse_classCharacterRange, - "colon": parse_colon, - "comment": parse_comment, - "digit": parse_digit, - "dot": parse_dot, - "doubleQuotedCharacter": parse_doubleQuotedCharacter, - "doubleQuotedString": parse_doubleQuotedString, - "eol": parse_eol, - "eolChar": parse_eolChar, - "eolEscapeSequence": parse_eolEscapeSequence, - "equals": parse_equals, "grammar": parse_grammar, - "hexDigit": parse_hexDigit, - "hexEscapeSequence": parse_hexEscapeSequence, - "identifier": parse_identifier, "initializer": parse_initializer, + "rule": parse_rule, + "choice": parse_choice, + "sequence": parse_sequence, "labeled": parse_labeled, - "letter": parse_letter, - "literal": parse_literal, - "lowerCaseLetter": parse_lowerCaseLetter, - "lparen": parse_lparen, - "multiLineComment": parse_multiLineComment, - "nonBraceCharacter": parse_nonBraceCharacter, - "nonBraceCharacters": parse_nonBraceCharacters, - "not": parse_not, - "plus": parse_plus, "prefixed": parse_prefixed, + "suffixed": parse_suffixed, "primary": parse_primary, - "question": parse_question, - "rparen": parse_rparen, - "rule": parse_rule, + "action": parse_action, + "braced": parse_braced, + "nonBraceCharacters": parse_nonBraceCharacters, + "nonBraceCharacter": parse_nonBraceCharacter, + "equals": parse_equals, + "colon": parse_colon, "semicolon": parse_semicolon, - "sequence": parse_sequence, - "simpleBracketDelimitedCharacter": parse_simpleBracketDelimitedCharacter, - "simpleDoubleQuotedCharacter": parse_simpleDoubleQuotedCharacter, - "simpleEscapeSequence": parse_simpleEscapeSequence, - "simpleSingleQuotedCharacter": parse_simpleSingleQuotedCharacter, - "singleLineComment": parse_singleLineComment, - "singleQuotedCharacter": parse_singleQuotedCharacter, - "singleQuotedString": parse_singleQuotedString, "slash": parse_slash, + "and": parse_and, + "not": parse_not, + "question": parse_question, "star": parse_star, + "plus": parse_plus, + "lparen": parse_lparen, + "rparen": parse_rparen, + "dot": parse_dot, + "identifier": parse_identifier, + "literal": parse_literal, "string": parse_string, - "suffixed": parse_suffixed, + "doubleQuotedString": parse_doubleQuotedString, + "doubleQuotedCharacter": parse_doubleQuotedCharacter, + "simpleDoubleQuotedCharacter": parse_simpleDoubleQuotedCharacter, + "singleQuotedString": parse_singleQuotedString, + "singleQuotedCharacter": parse_singleQuotedCharacter, + "simpleSingleQuotedCharacter": parse_simpleSingleQuotedCharacter, + "class": parse_class, + "classCharacterRange": parse_classCharacterRange, + "classCharacter": parse_classCharacter, + "bracketDelimitedCharacter": parse_bracketDelimitedCharacter, + "simpleBracketDelimitedCharacter": parse_simpleBracketDelimitedCharacter, + "simpleEscapeSequence": parse_simpleEscapeSequence, + "zeroEscapeSequence": parse_zeroEscapeSequence, + "hexEscapeSequence": parse_hexEscapeSequence, "unicodeEscapeSequence": parse_unicodeEscapeSequence, + "eolEscapeSequence": parse_eolEscapeSequence, + "digit": parse_digit, + "hexDigit": parse_hexDigit, + "letter": parse_letter, + "lowerCaseLetter": parse_lowerCaseLetter, "upperCaseLetter": parse_upperCaseLetter, + "__": parse___, + "comment": parse_comment, + "singleLineComment": parse_singleLineComment, + "multiLineComment": parse_multiLineComment, + "eol": parse_eol, + "eolChar": parse_eolChar, "whitespace": parse_whitespace, - "zeroEscapeSequence": parse_zeroEscapeSequence }; if (startRule !== undefined) { @@ -189,13 +189,10 @@ PEG.parser = (function(){ } if (result0 !== null) { result0 = (function(initializer, rules) { - var rulesConverted = {}; - each(rules, function(rule) { rulesConverted[rule.name] = rule; }); - return { type: "grammar", initializer: initializer !== "" ? initializer : null, - rules: rulesConverted, + rules: rules, startRule: rules[0].name }; })(result0[1], result0[2]); diff --git a/src/parser.pegjs b/src/parser.pegjs index fe199d2..6b3c77b 100644 --- a/src/parser.pegjs +++ b/src/parser.pegjs @@ -1,12 +1,9 @@ grammar = __ initializer:initializer? rules:rule+ { - var rulesConverted = {}; - each(rules, function(rule) { rulesConverted[rule.name] = rule; }); - return { type: "grammar", initializer: initializer !== "" ? initializer : null, - rules: rulesConverted, + rules: rules, startRule: rules[0].name }; } diff --git a/src/passes.js b/src/passes.js index afa872e..4cd19cf 100644 --- a/src/passes.js +++ b/src/passes.js @@ -17,13 +17,7 @@ PEG.compiler.passes = { } var check = buildNodeVisitor({ - grammar: - function(node) { - for (var name in node.rules) { - check(node.rules[name]); - } - }, - + grammar: checkSubnodes("rules"), rule: checkExpression, choice: checkSubnodes("alternatives"), sequence: checkSubnodes("elements"), @@ -39,7 +33,7 @@ PEG.compiler.passes = { rule_ref: function(node) { - if (ast.rules[node.name] === undefined) { + if (!findRuleByName(ast, node.name)) { throw new PEG.GrammarError( "Referenced rule \"" + node.name + "\" does not exist." ); @@ -62,25 +56,23 @@ PEG.compiler.passes = { check(node.expression, appliedRules); } + function checkSubnodes(propertyName) { + return function(node, appliedRules) { + each(node[propertyName], function(subnode) { + check(subnode, appliedRules); + }); + }; + } + var check = buildNodeVisitor({ - grammar: - function(node, appliedRules) { - for (var name in node.rules) { - check(node.rules[name], appliedRules); - } - }, + grammar: checkSubnodes("rules"), rule: function(node, appliedRules) { check(node.expression, appliedRules.concat(node.name)); }, - choice: - function(node, appliedRules) { - each(node.alternatives, function(alternative) { - check(alternative, appliedRules); - }); - }, + choice: checkSubnodes("alternatives"), sequence: function(node, appliedRules) { @@ -106,7 +98,7 @@ PEG.compiler.passes = { "Left recursion detected for rule \"" + node.name + "\"." ); } - check(ast.rules[node.name], appliedRules); + check(findRuleByName(ast, node.name), appliedRules); }, literal: nop, @@ -141,13 +133,7 @@ PEG.compiler.passes = { } var replace = buildNodeVisitor({ - grammar: - function(node, from, to) { - for (var name in node.rules) { - replace(node.rules[name], from, to); - } - }, - + grammar: replaceInSubnodes("rules"), rule: replaceInExpression, choice: replaceInSubnodes("alternatives"), sequence: replaceInSubnodes("elements"), @@ -176,15 +162,23 @@ PEG.compiler.passes = { replace(ast, from, to); } - for (var name in ast.rules) { - if (isProxyRule(ast.rules[name])) { - replaceRuleRefs(ast, ast.rules[name].name, ast.rules[name].expression.name); - if (name === ast.startRule) { - ast.startRule = ast.rules[name].expression.name; + var indices = []; + + each(ast.rules, function(rule, i) { + if (isProxyRule(rule)) { + replaceRuleRefs(ast, rule.name, rule.expression.name); + if (rule.name === ast.startRule) { + ast.startRule = rule.expression.name; } - delete ast.rules[name]; + indices.push(i); } - } + }); + + indices.reverse(); + + each(indices, function(index) { + ast.rules.splice(index, 1); + }); }, /* @@ -240,11 +234,9 @@ PEG.compiler.passes = { var compute = buildNodeVisitor({ grammar: function(node, index) { - var name; - - for (name in node.rules) { - compute(node.rules[name], index); - } + each(node.rules, function(node) { + compute(node, index); + }); }, rule: @@ -356,11 +348,7 @@ PEG.compiler.passes = { var compute = buildNodeVisitor({ grammar: function(node) { - var name; - - for (name in node.rules) { - compute(node.rules[name]); - } + each(node.rules, compute); }, rule: computeForScopedExpression, diff --git a/src/utils.js b/src/utils.js index 4935894..e90faa9 100644 --- a/src/utils.js +++ b/src/utils.js @@ -12,6 +12,15 @@ function range(start, stop) { return result; } +function find(array, callback) { + var length = array.length; + for (var i = 0; i < length; i++) { + if (callback(array[i])) { + return array[i]; + } + } +} + function contains(array, value) { /* * Stupid IE does not have Array.prototype.indexOf, otherwise this function @@ -168,3 +177,7 @@ function buildNodeVisitor(functions) { return functions[node.type].apply(null, arguments); }; } + +function findRuleByName(ast, name) { + return find(ast.rules, function(r) { return r.name === name; }); +} diff --git a/test/parser-test.js b/test/parser-test.js index de9b5ae..5452879 100644 --- a/test/parser-test.js +++ b/test/parser-test.js @@ -125,7 +125,7 @@ function oneRuleGrammar(expression) { return { type: "grammar", initializer: null, - rules: { start: rule("start", null, expression) }, + rules: [rule("start", null, expression)], startRule: "start" }; } @@ -155,16 +155,14 @@ function actionGrammar(action) { var initializerGrammar = { type: "grammar", initializer: initializer(" code "), - rules: { - a: rule("a", null, literalAbcd) - }, + rules: [rule("a", null, literalAbcd)], startRule: "a" }; var namedRuleGrammar = { type: "grammar", initializer: null, - rules: { start: rule("start", "abcd", literalAbcd) }, + rules: [rule("start", "abcd", literalAbcd)], startRule: "start" }; @@ -175,7 +173,7 @@ test("parses grammar", function() { { type: "grammar", initializer: null, - rules: { a: rule("a", null, literalAbcd) }, + rules: [rule("a", null, literalAbcd)], startRule: "a" } ); @@ -185,11 +183,11 @@ test("parses grammar", function() { { type: "grammar", initializer: null, - rules: { - a: rule("a", null, literalAbcd), - b: rule("b", null, literalEfgh), - c: rule("c", null, literalIjkl) - }, + rules: [ + rule("a", null, literalAbcd), + rule("b", null, literalEfgh), + rule("c", null, literalIjkl) + ], startRule: "a" } ); @@ -212,7 +210,7 @@ test("parses rule", function() { { type: "grammar", initializer: null, - rules: { start: rule("start", "start rule", choiceLiterals) }, + rules: [rule("start", "start rule", choiceLiterals)], startRule: "start" } ); diff --git a/test/passes-test.js b/test/passes-test.js index 2d5e403..46ab528 100644 --- a/test/passes-test.js +++ b/test/passes-test.js @@ -93,15 +93,15 @@ test("removes proxy rules", function() { function simpleGrammarWithStartAndProxied(startRuleExpression) { return simpleGrammar( - { - start: { + [ + { type: "rule", name: "start", displayName: null, expression: startRuleExpression }, - proxied: proxiedRule - }, + proxiedRule + ], "start" ); } @@ -109,7 +109,7 @@ test("removes proxy rules", function() { var cases = [ { grammar: 'start = proxy; proxy = proxied; proxied = "a"', - ast: simpleGrammar({ proxied: proxiedRule }, "proxied") + ast: simpleGrammar([proxiedRule], "proxied") }, { grammar: 'start = proxy / "a" / "b"; proxy = proxied; proxied = "a"', @@ -436,9 +436,9 @@ test("computes variable names", function() { var ast = PEG.parser.parse(cases[i].grammar); PEG.compiler.passes.computeVarNames(ast); - deepEqual(ast.rules["start"].resultVars, cases[i].resultVars); - deepEqual(ast.rules["start"].posVars, cases[i].posVars); - checkDetails(ast.rules["start"].expression, cases[i].details); + deepEqual(ast.rules[0].resultVars, cases[i].resultVars); + deepEqual(ast.rules[0].posVars, cases[i].posVars); + checkDetails(ast.rules[0].expression, cases[i].details); } }); @@ -588,7 +588,7 @@ test("computes params", function() { PEG.compiler.passes.computeParams(ast); deepEqual( - cases[i].extractor(ast.rules["start"].expression).params, + cases[i].extractor(ast.rules[0].expression).params, cases[i].params ); }