diff --git a/lib/compiler.js b/lib/compiler.js index e6f4288..3ec6bad 100644 --- a/lib/compiler.js +++ b/lib/compiler.js @@ -240,15 +240,252 @@ PEG.Compiler = { this._uniqueIdentifierCounters = {}; }, - _compileFunctions: { + /* + * Generates a parser from a specified grammar AST. Throws |PEG.GrammarError| + * if the AST contains a semantic error. Note that not all errors are detected + * during the generation and some may protrude to the generated parser and + * cause its malfunction. + */ + compileParser: function(ast) { + for (var i = 0; i < this.checks.length; i++) { + this.checks[i](ast); + } + + for (var i = 0; i < this.passes.length; i++) { + ast = this.passes[i](ast); + } + + var source = this.emitter(ast); + var result = eval(source); + result._source = source; + + return result; + } +}; + +/* + * Checks made on the grammar AST before compilation. Each check is a function + * that is passed the AST and does not return anything. If the check passes, the + * function does not do anything special, otherwise it throws + * |PEG.GrammarError|. The checks are run in sequence in order of their + * definition. + */ +PEG.Compiler.checks = [ + /* Checks that all referenced rules exist. */ + function(ast) { + function nop() {} + + function checkExpression(node) { check(node.expression); } + + function checkSubnodes(propertyName) { + return function(node) { + PEG.ArrayUtils.each(node[propertyName], check); + }; + } + + var checkFunctions = { + grammar: + function(node) { + for (var name in node.rules) { + check(node.rules[name]); + } + }, + + rule: checkExpression, + choice: checkSubnodes("alternatives"), + sequence: checkSubnodes("elements"), + labeled: checkExpression, + simple_and: checkExpression, + simple_not: checkExpression, + semantic_and: nop, + semantic_not: nop, + optional: checkExpression, + zero_or_more: checkExpression, + one_or_more: checkExpression, + action: checkExpression, + + rule_ref: + function(node) { + if (ast.rules[node.name] === undefined) { + throw new PEG.GrammarError( + "Referenced rule \"" + node.name + "\" does not exist." + ); + } + }, + + literal: nop, + any: nop, + "class": nop + }; + + function check(node) { checkFunctions[node.type](node); } + + check(ast); + }, + + /* Checks that no left recursion is present. */ + function(ast) { + function nop() {} + + function checkExpression(node, appliedRules) { + check(node.expression, appliedRules); + } + + var checkFunctions = { + grammar: + function(node, appliedRules) { + for (var name in node.rules) { + check(ast.rules[name], appliedRules); + } + }, + + rule: + function(node, appliedRules) { + check(node.expression, appliedRules.concat(node.name)); + }, + + choice: + function(node, appliedRules) { + PEG.ArrayUtils.each(node.alternatives, function(alternative) { + check(alternative, appliedRules); + }); + }, + + sequence: + function(node, appliedRules) { + if (node.elements.length > 0) { + check(node.elements[0], appliedRules); + } + }, + + labeled: checkExpression, + simple_and: checkExpression, + simple_not: checkExpression, + semantic_and: nop, + semantic_not: nop, + optional: checkExpression, + zero_or_more: checkExpression, + one_or_more: checkExpression, + action: checkExpression, + + rule_ref: + function(node, appliedRules) { + if (PEG.ArrayUtils.contains(appliedRules, node.name)) { + throw new PEG.GrammarError( + "Left recursion detected for rule \"" + node.name + "\"." + ); + } + check(ast.rules[node.name], appliedRules); + }, + + literal: nop, + any: nop, + "class": nop + }; + + function check(node, appliedRules) { + checkFunctions[node.type](node, appliedRules); + } + + check(ast, []); + } +]; + +/* + * Optimalization passes made on the grammar AST before compilation. Each pass + * is a function that is passed the AST and returns a new AST. The AST can be + * modified in-place by the pass. The passes are run in sequence in order of + * their definition. + */ +PEG.Compiler.passes = [ + /* + * Removes proxy rules -- that is, rules that only delegate to other rule. + */ + function(ast) { + function isProxyRule(node) { + return node.type === "rule" && node.expression.type === "rule_ref"; + } + + function replaceRuleRefs(ast, from, to) { + function nop() {} + + function replaceInExpression(node, from, to) { + replace(node.expression, from, to); + } + + function replaceInSubnodes(propertyName) { + return function(node, from, to) { + PEG.ArrayUtils.each(node[propertyName], function(node) { + replace(node, from, to); + }); + }; + } + + var replaceFunctions = { + grammar: + function(node, from, to) { + for (var name in node.rules) { + replace(ast.rules[name], from, to); + } + }, + + rule: replaceInExpression, + choice: replaceInSubnodes("alternatives"), + sequence: replaceInSubnodes("elements"), + labeled: replaceInExpression, + simple_and: replaceInExpression, + simple_not: replaceInExpression, + semantic_and: nop, + semantic_not: nop, + optional: replaceInExpression, + zero_or_more: replaceInExpression, + one_or_more: replaceInExpression, + action: replaceInExpression, + + rule_ref: + function(node, from, to) { + if (node.name === from) { + node.name = to; + } + }, + + literal: nop, + any: nop, + "class": nop + }; + + function replace(node, from, to) { + replaceFunctions[node.type](node, from, to); + } + + replace(ast, from, to); + } + + for (var name in ast.rules) { + if (isProxyRule(ast.rules[name])) { + replaceRuleRefs(ast, ast.rules[name].name, ast.rules[name].expression.name); + if (name === ast.startRule) { + ast.startRule = ast.rules[name].expression.name; + } + delete ast.rules[name]; + } + } + + return ast; + } +]; + +/* Emits the generated code for the AST. */ +PEG.Compiler.emitter = function(ast) { + var emitFunctions = { grammar: function(node) { var initializerCode = node.initializer !== null - ? PEG.Compiler.compileNode(node.initializer) + ? emit(node.initializer) : ""; var parseFunctionDefinitions = []; for (var name in node.rules) { - parseFunctionDefinitions.push(PEG.Compiler.compileNode(node.rules[name])); + parseFunctionDefinitions.push(emit(node.rules[name])); } return PEG.Compiler.formatCode( @@ -502,7 +739,7 @@ PEG.Compiler = { setReportMatchFailuresCode: setReportMatchFailuresCode, restoreReportMatchFailuresCode: restoreReportMatchFailuresCode, reportMatchFailureCode: reportMatchFailureCode, - code: PEG.Compiler.compileNode(node.expression, resultVar), + code: emit(node.expression, resultVar), resultVar: resultVar } ); @@ -540,7 +777,7 @@ PEG.Compiler = { " ${code};", "}", { - alternativeCode: PEG.Compiler.compileNode(node.alternatives[i], alternativeResultVar), + alternativeCode: emit(node.alternatives[i], alternativeResultVar), alternativeResultVar: alternativeResultVar, code: code, resultVar: resultVar @@ -576,7 +813,7 @@ PEG.Compiler = { " pos = ${savedPosVar};", "}", { - elementCode: PEG.Compiler.compileNode(node.elements[i], elementResultVars[i]), + elementCode: emit(node.elements[i], elementResultVars[i]), elementResultVar: elementResultVars[i], code: code, savedPosVar: savedPosVar, @@ -596,7 +833,7 @@ PEG.Compiler = { }, labeled: function(node, resultVar) { - return PEG.Compiler.compileNode(node.expression, resultVar); + return emit(node.expression, resultVar); }, simple_and: function(node, resultVar) { @@ -617,7 +854,7 @@ PEG.Compiler = { " var ${resultVar} = null;", "}", { - expressionCode: PEG.Compiler.compileNode(node.expression, expressionResultVar), + expressionCode: emit(node.expression, expressionResultVar), expressionResultVar: expressionResultVar, savedPosVar: savedPosVar, savedReportMatchFailuresVar: savedReportMatchFailuresVar, @@ -644,7 +881,7 @@ PEG.Compiler = { " pos = ${savedPosVar};", "}", { - expressionCode: PEG.Compiler.compileNode(node.expression, expressionResultVar), + expressionCode: emit(node.expression, expressionResultVar), expressionResultVar: expressionResultVar, savedPosVar: savedPosVar, savedReportMatchFailuresVar: savedReportMatchFailuresVar, @@ -684,7 +921,7 @@ PEG.Compiler = { "${expressionCode}", "var ${resultVar} = ${expressionResultVar} !== null ? ${expressionResultVar} : '';", { - expressionCode: PEG.Compiler.compileNode(node.expression, expressionResultVar), + expressionCode: emit(node.expression, expressionResultVar), expressionResultVar: expressionResultVar, resultVar: resultVar } @@ -702,7 +939,7 @@ PEG.Compiler = { " ${expressionCode}", "}", { - expressionCode: PEG.Compiler.compileNode(node.expression, expressionResultVar), + expressionCode: emit(node.expression, expressionResultVar), expressionResultVar: expressionResultVar, resultVar: resultVar } @@ -724,7 +961,7 @@ PEG.Compiler = { " var ${resultVar} = null;", "}", { - expressionCode: PEG.Compiler.compileNode(node.expression, expressionResultVar), + expressionCode: emit(node.expression, expressionResultVar), expressionResultVar: expressionResultVar, resultVar: resultVar } @@ -769,7 +1006,7 @@ PEG.Compiler = { " ? (function(${formalParams}) {${actionCode}})(${actualParams})", " : null;", { - expressionCode: PEG.Compiler.compileNode(node.expression, expressionResultVar), + expressionCode: emit(node.expression, expressionResultVar), expressionResultVar: expressionResultVar, actionCode: node.code, formalParams: formalParams.join(", "), @@ -860,250 +1097,13 @@ PEG.Compiler = { } ); } - }, + }; - /* - * Compiles an AST node and returns the generated code. The |resultVar| - * parameter contains a name of variable in which the match result will be - * stored in the generated code. - */ - compileNode: function(node, resultVar) { - return this._compileFunctions[node.type](node, resultVar); - }, - - /* - * Generates a parser from a specified grammar AST. Throws |PEG.GrammarError| - * if the AST contains a semantic error. Note that not all errors are detected - * during the generation and some may protrude to the generated parser and - * cause its malfunction. - */ - compileParser: function(ast) { - for (var i = 0; i < this.checks.length; i++) { - this.checks[i](ast); - } - - for (var i = 0; i < this.passes.length; i++) { - ast = this.passes[i](ast); - } - - var source = this.compileNode(ast); - var result = eval(source); - result._source = source; - - return result; + function emit(node, resultVar) { + return emitFunctions[node.type](node, resultVar); } -}; - -/* - * Checks made on the grammar AST before compilation. Each check is a function - * that is passed the AST and does not return anything. If the check passes, the - * function does not do anything special, otherwise it throws - * |PEG.GrammarError|. The checks are run in sequence in order of their - * definition. - */ -PEG.Compiler.checks = [ - /* Checks that all referenced rules exist. */ - function(ast) { - function nop() {} - - function checkExpression(node) { check(node.expression); } - - function checkSubnodes(propertyName) { - return function(node) { - PEG.ArrayUtils.each(node[propertyName], check); - }; - } - - var checkFunctions = { - grammar: - function(node) { - for (var name in node.rules) { - check(node.rules[name]); - } - }, - - rule: checkExpression, - choice: checkSubnodes("alternatives"), - sequence: checkSubnodes("elements"), - labeled: checkExpression, - simple_and: checkExpression, - simple_not: checkExpression, - semantic_and: nop, - semantic_not: nop, - optional: checkExpression, - zero_or_more: checkExpression, - one_or_more: checkExpression, - action: checkExpression, - - rule_ref: - function(node) { - if (ast.rules[node.name] === undefined) { - throw new PEG.GrammarError( - "Referenced rule \"" + node.name + "\" does not exist." - ); - } - }, - - literal: nop, - any: nop, - "class": nop - }; - - function check(node) { checkFunctions[node.type](node); } - - check(ast); - }, - - /* Checks that no left recursion is present. */ - function(ast) { - function nop() {} - - function checkExpression(node, appliedRules) { - check(node.expression, appliedRules); - } - - var checkFunctions = { - grammar: - function(node, appliedRules) { - for (var name in node.rules) { - check(ast.rules[name], appliedRules); - } - }, - - rule: - function(node, appliedRules) { - check(node.expression, appliedRules.concat(node.name)); - }, - - choice: - function(node, appliedRules) { - PEG.ArrayUtils.each(node.alternatives, function(alternative) { - check(alternative, appliedRules); - }); - }, - - sequence: - function(node, appliedRules) { - if (node.elements.length > 0) { - check(node.elements[0], appliedRules); - } - }, - labeled: checkExpression, - simple_and: checkExpression, - simple_not: checkExpression, - semantic_and: nop, - semantic_not: nop, - optional: checkExpression, - zero_or_more: checkExpression, - one_or_more: checkExpression, - action: checkExpression, - - rule_ref: - function(node, appliedRules) { - if (PEG.ArrayUtils.contains(appliedRules, node.name)) { - throw new PEG.GrammarError( - "Left recursion detected for rule \"" + node.name + "\"." - ); - } - check(ast.rules[node.name], appliedRules); - }, - - literal: nop, - any: nop, - "class": nop - }; - - function check(node, appliedRules) { - checkFunctions[node.type](node, appliedRules); - } - - check(ast, []); - } -]; - -/* - * Optimalization passes made on the grammar AST before compilation. Each pass - * is a function that is passed the AST and returns a new AST. The AST can be - * modified in-place by the pass. The passes are run in sequence in order of - * their definition. - */ -PEG.Compiler.passes = [ - /* - * Removes proxy rules -- that is, rules that only delegate to other rule. - */ - function(ast) { - function isProxyRule(node) { - return node.type === "rule" && node.expression.type === "rule_ref"; - } - - function replaceRuleRefs(ast, from, to) { - function nop() {} - - function replaceInExpression(node, from, to) { - replace(node.expression, from, to); - } - - function replaceInSubnodes(propertyName) { - return function(node, from, to) { - PEG.ArrayUtils.each(node[propertyName], function(node) { - replace(node, from, to); - }); - }; - } - - var replaceFunctions = { - grammar: - function(node, from, to) { - for (var name in node.rules) { - replace(ast.rules[name], from, to); - } - }, - - rule: replaceInExpression, - choice: replaceInSubnodes("alternatives"), - sequence: replaceInSubnodes("elements"), - labeled: replaceInExpression, - simple_and: replaceInExpression, - simple_not: replaceInExpression, - semantic_and: nop, - semantic_not: nop, - optional: replaceInExpression, - zero_or_more: replaceInExpression, - one_or_more: replaceInExpression, - action: replaceInExpression, - - rule_ref: - function(node, from, to) { - if (node.name === from) { - node.name = to; - } - }, - - literal: nop, - any: nop, - "class": nop - }; - - function replace(node, from, to) { - replaceFunctions[node.type](node, from, to); - } - - replace(ast, from, to); - } - - for (var name in ast.rules) { - if (isProxyRule(ast.rules[name])) { - replaceRuleRefs(ast, ast.rules[name].name, ast.rules[name].expression.name); - if (name === ast.startRule) { - ast.startRule = ast.rules[name].expression.name; - } - delete ast.rules[name]; - } - } - - return ast; - } -]; + return emit(ast); +}; })();