Move emitter utility functions out of |PEG.Compiler|

redux
David Majda 14 years ago
parent e5a5572a87
commit 1682a25b0d

@ -130,116 +130,6 @@ PEG.RegExpUtils = {
/* ===== PEG.Compiler ===== */
PEG.Compiler = {
/*
* Takes parts of code, interpolates variables inside them and joins them with
* a newline.
*
* Variables are delimited with "${" and "}" and their names must be valid
* identifiers (i.e. they must match [a-zA-Z_][a-zA-Z0-9_]*). Variable values
* are specified as properties of the last parameter (if this is an object,
* otherwise empty variable set is assumed). Undefined variables result in
* throwing |Error|.
*
* There can be a filter specified after the variable name, prefixed with "|".
* The filter name must be a valid identifier. The only recognized filter
* right now is "string", which quotes the variable value as a JavaScript
* string. Unrecognized filters result in throwing |Error|.
*
* If any part has multiple lines and the first line is indented by some
* amount of whitespace (as defined by the /\s+/ JavaScript regular
* expression), second to last lines are indented by the same amount of
* whitespace. This results in nicely indented multiline code in variables
* without making the templates look ugly.
*
* Examples:
*
* PEG.Compiler.formatCode("foo", "bar"); // "foo\nbar"
* PEG.Compiler.formatCode(
* "foo", "${bar}",
* { bar: "baz" }
* ); // "foo\nbaz"
* PEG.Compiler.formatCode("foo", "${bar}"); // throws Error
* PEG.Compiler.formatCode(
* "foo", "${bar|string}",
* { bar: "baz" }
* ); // "foo\n\"baz\""
* PEG.Compiler.formatCode(
* "foo", "${bar|eeek}",
* { bar: "baz" }
* ); // throws Error
* PEG.Compiler.formatCode(
* "foo", "${bar}",
* { bar: " baz\nqux" }
* ); // "foo\n baz\n qux"
*/
formatCode: function() {
function interpolateVariablesInParts(parts) {
return PEG.ArrayUtils.map(parts, function(part) {
return part.replace(
/\$\{([a-zA-Z_][a-zA-Z0-9_]*)(\|([a-zA-Z_][a-zA-Z0-9_]*))?\}/g,
function(match, name, dummy, filter) {
var value = vars[name];
if (value === undefined) {
throw new Error("Undefined variable: \"" + name + "\".");
}
if (filter !== undefined && filter != "") { // JavaScript engines differ here.
if (filter === "string") {
return PEG.StringUtils.quote(value);
} else {
throw new Error("Unrecognized filter: \"" + filter + "\".");
}
} else {
return value;
}
}
);
});
}
function indentMultilineParts(parts) {
return PEG.ArrayUtils.map(parts, function(part) {
if (!/\n/.test(part)) { return part; }
var firstLineWhitespacePrefix = part.match(/^\s*/)[0];
var lines = part.split("\n");
var linesIndented = [lines[0]].concat(
PEG.ArrayUtils.map(lines.slice(1), function(line) {
return firstLineWhitespacePrefix + line;
})
);
return linesIndented.join("\n");
});
}
var args = Array.prototype.slice.call(arguments);
var vars = args[args.length - 1] instanceof Object ? args.pop() : {};
return indentMultilineParts(interpolateVariablesInParts(args)).join("\n");
},
_uniqueIdentifierCounters: {},
/*
* Generates a unique identifier with specified prefix. The sequence of
* generated identifiers with given prefix is repeatable and will be the same
* within different language runtimes.
*/
generateUniqueIdentifier: function(prefix) {
this._uniqueIdentifierCounters[prefix]
= this._uniqueIdentifierCounters[prefix] || 0;
return prefix + this._uniqueIdentifierCounters[prefix]++;
},
/*
* Resets internal counters of the unique identifier generator. The sequence
* of identifiers with given prefix generated by |generateUniqueIdentifier|
* will start from the beginning.
*/
resetUniqueIdentifierCounters: function() {
this._uniqueIdentifierCounters = {};
},
/*
* Generates a parser from a specified grammar AST. Throws |PEG.GrammarError|
* if the AST contains a semantic error. Note that not all errors are detected
@ -477,6 +367,96 @@ PEG.Compiler.passes = [
/* Emits the generated code for the AST. */
PEG.Compiler.emitter = function(ast) {
/*
* Takes parts of code, interpolates variables inside them and joins them with
* a newline.
*
* Variables are delimited with "${" and "}" and their names must be valid
* identifiers (i.e. they must match [a-zA-Z_][a-zA-Z0-9_]*). Variable values
* are specified as properties of the last parameter (if this is an object,
* otherwise empty variable set is assumed). Undefined variables result in
* throwing |Error|.
*
* There can be a filter specified after the variable name, prefixed with "|".
* The filter name must be a valid identifier. The only recognized filter
* right now is "string", which quotes the variable value as a JavaScript
* string. Unrecognized filters result in throwing |Error|.
*
* If any part has multiple lines and the first line is indented by some
* amount of whitespace (as defined by the /\s+/ JavaScript regular
* expression), second to last lines are indented by the same amount of
* whitespace. This results in nicely indented multiline code in variables
* without making the templates look ugly.
*
* Examples:
*
* formatCode("foo", "bar"); // "foo\nbar"
* formatCode("foo", "${bar}", { bar: "baz" }); // "foo\nbaz"
* formatCode("foo", "${bar}"); // throws Error
* formatCode("foo", "${bar|string}", { bar: "baz" }); // "foo\n\"baz\""
* formatCode("foo", "${bar|eeek}", { bar: "baz" }); // throws Error
* formatCode("foo", "${bar}", { bar: " baz\nqux" }); // "foo\n baz\n qux"
*/
function formatCode() {
function interpolateVariablesInParts(parts) {
return PEG.ArrayUtils.map(parts, function(part) {
return part.replace(
/\$\{([a-zA-Z_][a-zA-Z0-9_]*)(\|([a-zA-Z_][a-zA-Z0-9_]*))?\}/g,
function(match, name, dummy, filter) {
var value = vars[name];
if (value === undefined) {
throw new Error("Undefined variable: \"" + name + "\".");
}
if (filter !== undefined && filter != "") { // JavaScript engines differ here.
if (filter === "string") {
return PEG.StringUtils.quote(value);
} else {
throw new Error("Unrecognized filter: \"" + filter + "\".");
}
} else {
return value;
}
}
);
});
}
function indentMultilineParts(parts) {
return PEG.ArrayUtils.map(parts, function(part) {
if (!/\n/.test(part)) { return part; }
var firstLineWhitespacePrefix = part.match(/^\s*/)[0];
var lines = part.split("\n");
var linesIndented = [lines[0]].concat(
PEG.ArrayUtils.map(lines.slice(1), function(line) {
return firstLineWhitespacePrefix + line;
})
);
return linesIndented.join("\n");
});
}
var args = Array.prototype.slice.call(arguments);
var vars = args[args.length - 1] instanceof Object ? args.pop() : {};
return indentMultilineParts(interpolateVariablesInParts(args)).join("\n");
};
/* Unique ID generator. */
var UID = {
_counters: {},
next: function(prefix) {
this._counters[prefix] = this._counters[prefix] || 0;
return prefix + this._counters[prefix]++;
},
reset: function() {
this._counters = {};
}
};
var emitFunctions = {
grammar: function(node) {
var initializerCode = node.initializer !== null
@ -488,7 +468,7 @@ PEG.Compiler.emitter = function(ast) {
parseFunctionDefinitions.push(emit(node.rules[name]));
}
return PEG.Compiler.formatCode(
return formatCode(
"(function(){",
" /* Generated by PEG.js (http://pegjs.majda.cz/). */",
" ",
@ -687,19 +667,19 @@ PEG.Compiler.emitter = function(ast) {
* generated grammar stored in a VCS (this is true e.g. for our
* metagrammar).
*/
PEG.Compiler.resetUniqueIdentifierCounters();
UID.reset();
var resultVar = PEG.Compiler.generateUniqueIdentifier("result");
var resultVar = UID.next("result");
if (node.displayName !== null) {
var setReportMatchFailuresCode = PEG.Compiler.formatCode(
var setReportMatchFailuresCode = formatCode(
"var savedReportMatchFailures = context.reportMatchFailures;",
"context.reportMatchFailures = false;"
);
var restoreReportMatchFailuresCode = PEG.Compiler.formatCode(
var restoreReportMatchFailuresCode = formatCode(
"context.reportMatchFailures = savedReportMatchFailures;"
);
var reportMatchFailureCode = PEG.Compiler.formatCode(
var reportMatchFailureCode = formatCode(
"if (context.reportMatchFailures && ${resultVar} === null) {",
" matchFailed(${displayName|string});",
"}",
@ -714,7 +694,7 @@ PEG.Compiler.emitter = function(ast) {
var reportMatchFailureCode = "";
}
return PEG.Compiler.formatCode(
return formatCode(
"function parse_${name}(context) {",
" var cacheKey = ${name|string} + '@' + pos;",
" var cachedResult = cache[cacheKey];",
@ -762,14 +742,14 @@ PEG.Compiler.emitter = function(ast) {
*/
choice: function(node, resultVar) {
var code = PEG.Compiler.formatCode(
var code = formatCode(
"var ${resultVar} = null;",
{ resultVar: resultVar }
);
for (var i = node.alternatives.length - 1; i >= 0; i--) {
var alternativeResultVar = PEG.Compiler.generateUniqueIdentifier("result");
code = PEG.Compiler.formatCode(
var alternativeResultVar = UID.next("result");
code = formatCode(
"${alternativeCode}",
"if (${alternativeResultVar} !== null) {",
" var ${resultVar} = ${alternativeResultVar};",
@ -789,13 +769,13 @@ PEG.Compiler.emitter = function(ast) {
},
sequence: function(node, resultVar) {
var savedPosVar = PEG.Compiler.generateUniqueIdentifier("savedPos");
var savedPosVar = UID.next("savedPos");
var elementResultVars = PEG.ArrayUtils.map(node.elements, function() {
return PEG.Compiler.generateUniqueIdentifier("result")
return UID.next("result")
});
var code = PEG.Compiler.formatCode(
var code = formatCode(
"var ${resultVar} = ${elementResultVarArray};",
{
resultVar: resultVar,
@ -804,7 +784,7 @@ PEG.Compiler.emitter = function(ast) {
);
for (var i = node.elements.length - 1; i >= 0; i--) {
code = PEG.Compiler.formatCode(
code = formatCode(
"${elementCode}",
"if (${elementResultVar} !== null) {",
" ${code}",
@ -822,7 +802,7 @@ PEG.Compiler.emitter = function(ast) {
);
}
return PEG.Compiler.formatCode(
return formatCode(
"var ${savedPosVar} = pos;",
"${code}",
{
@ -837,11 +817,11 @@ PEG.Compiler.emitter = function(ast) {
},
simple_and: function(node, resultVar) {
var savedPosVar = PEG.Compiler.generateUniqueIdentifier("savedPos");
var savedReportMatchFailuresVar = PEG.Compiler.generateUniqueIdentifier("savedReportMatchFailuresVar");
var expressionResultVar = PEG.Compiler.generateUniqueIdentifier("result");
var savedPosVar = UID.next("savedPos");
var savedReportMatchFailuresVar = UID.next("savedReportMatchFailuresVar");
var expressionResultVar = UID.next("result");
return PEG.Compiler.formatCode(
return formatCode(
"var ${savedPosVar} = pos;",
"var ${savedReportMatchFailuresVar} = context.reportMatchFailures;",
"context.reportMatchFailures = false;",
@ -864,11 +844,11 @@ PEG.Compiler.emitter = function(ast) {
},
simple_not: function(node, resultVar) {
var savedPosVar = PEG.Compiler.generateUniqueIdentifier("savedPos");
var savedReportMatchFailuresVar = PEG.Compiler.generateUniqueIdentifier("savedReportMatchFailuresVar");
var expressionResultVar = PEG.Compiler.generateUniqueIdentifier("result");
var savedPosVar = UID.next("savedPos");
var savedReportMatchFailuresVar = UID.next("savedReportMatchFailuresVar");
var expressionResultVar = UID.next("result");
return PEG.Compiler.formatCode(
return formatCode(
"var ${savedPosVar} = pos;",
"var ${savedReportMatchFailuresVar} = context.reportMatchFailures;",
"context.reportMatchFailures = false;",
@ -891,9 +871,9 @@ PEG.Compiler.emitter = function(ast) {
},
semantic_and: function(node, resultVar) {
var savedPosVar = PEG.Compiler.generateUniqueIdentifier("savedPos");
var savedPosVar = UID.next("savedPos");
return PEG.Compiler.formatCode(
return formatCode(
"var ${resultVar} = (function() {${actionCode}})() ? '' : null;",
{
actionCode: node.code,
@ -903,9 +883,9 @@ PEG.Compiler.emitter = function(ast) {
},
semantic_not: function(node, resultVar) {
var savedPosVar = PEG.Compiler.generateUniqueIdentifier("savedPos");
var savedPosVar = UID.next("savedPos");
return PEG.Compiler.formatCode(
return formatCode(
"var ${resultVar} = (function() {${actionCode}})() ? null : '';",
{
actionCode: node.code,
@ -915,9 +895,9 @@ PEG.Compiler.emitter = function(ast) {
},
optional: function(node, resultVar) {
var expressionResultVar = PEG.Compiler.generateUniqueIdentifier("result");
var expressionResultVar = UID.next("result");
return PEG.Compiler.formatCode(
return formatCode(
"${expressionCode}",
"var ${resultVar} = ${expressionResultVar} !== null ? ${expressionResultVar} : '';",
{
@ -929,9 +909,9 @@ PEG.Compiler.emitter = function(ast) {
},
zero_or_more: function(node, resultVar) {
var expressionResultVar = PEG.Compiler.generateUniqueIdentifier("result");
var expressionResultVar = UID.next("result");
return PEG.Compiler.formatCode(
return formatCode(
"var ${resultVar} = [];",
"${expressionCode}",
"while (${expressionResultVar} !== null) {",
@ -947,9 +927,9 @@ PEG.Compiler.emitter = function(ast) {
},
one_or_more: function(node, resultVar) {
var expressionResultVar = PEG.Compiler.generateUniqueIdentifier("result");
var expressionResultVar = UID.next("result");
return PEG.Compiler.formatCode(
return formatCode(
"${expressionCode}",
"if (${expressionResultVar} !== null) {",
" var ${resultVar} = [];",
@ -978,7 +958,7 @@ PEG.Compiler.emitter = function(ast) {
* This behavior is reflected in this function.
*/
var expressionResultVar = PEG.Compiler.generateUniqueIdentifier("result");
var expressionResultVar = UID.next("result");
if (node.expression.type === "sequence") {
var formalParams = [];
@ -1000,7 +980,7 @@ PEG.Compiler.emitter = function(ast) {
var actualParams = [];
}
return PEG.Compiler.formatCode(
return formatCode(
"${expressionCode}",
"var ${resultVar} = ${expressionResultVar} !== null",
" ? (function(${formalParams}) {${actionCode}})(${actualParams})",
@ -1017,7 +997,7 @@ PEG.Compiler.emitter = function(ast) {
},
rule_ref: function(node, resultVar) {
return PEG.Compiler.formatCode(
return formatCode(
"var ${resultVar} = ${ruleMethod}(context);",
{
ruleMethod: "parse_" + node.name,
@ -1027,7 +1007,7 @@ PEG.Compiler.emitter = function(ast) {
},
literal: function(node, resultVar) {
return PEG.Compiler.formatCode(
return formatCode(
"if (input.substr(pos, ${length}) === ${value|string}) {",
" var ${resultVar} = ${value|string};",
" pos += ${length};",
@ -1046,7 +1026,7 @@ PEG.Compiler.emitter = function(ast) {
},
any: function(node, resultVar) {
return PEG.Compiler.formatCode(
return formatCode(
"if (input.length > pos) {",
" var ${resultVar} = input.charAt(pos);",
" pos++;",
@ -1080,7 +1060,7 @@ PEG.Compiler.emitter = function(ast) {
var regexp = node.inverted ? "/^[\\S\\s]/" : "/^(?!)/";
}
return PEG.Compiler.formatCode(
return formatCode(
"if (input.substr(pos).match(${regexp}) !== null) {",
" var ${resultVar} = input.charAt(pos);",
" pos++;",

@ -117,68 +117,6 @@ test("quoteForClass", function() {
);
});
/* ===== PEG.Compiler ===== */
module("PEG.Compiler");
test("formatCode joins parts", function() {
strictEqual(PEG.Compiler.formatCode("foo", "bar"), "foo\nbar");
});
test("formatCode interpolates variables", function() {
strictEqual(
PEG.Compiler.formatCode("foo", "${bar}", { bar: "baz" }),
"foo\nbaz"
);
throws(
function() { PEG.Compiler.formatCode("foo", "${bar}"); },
Error,
{ message: "Undefined variable: \"bar\"." }
);
});
test("formatCode filters variables", function() {
strictEqual(
PEG.Compiler.formatCode("foo", "${bar|string}", { bar: "baz" }),
"foo\n\"baz\""
);
throws(
function() { PEG.Compiler.formatCode("foo", "${bar|eeek}", { bar: "baz" }); },
Error,
{ message: "Unrecognized filter: \"eeek\"." }
);
});
test("formatCode indents multiline parts", function() {
strictEqual(
PEG.Compiler.formatCode("foo", "${bar}", { bar: " baz\nqux" }),
"foo\n baz\n qux"
);
});
test("generateUniqueIdentifier", function() {
notStrictEqual(
PEG.Compiler.generateUniqueIdentifier("prefix"),
PEG.Compiler.generateUniqueIdentifier("prefix")
);
});
test("resetUniqueIdentifierCounters", function() {
var ida1 = PEG.Compiler.generateUniqueIdentifier("a");
var ida2 = PEG.Compiler.generateUniqueIdentifier("a");
var idb1 = PEG.Compiler.generateUniqueIdentifier("b");
var idb2 = PEG.Compiler.generateUniqueIdentifier("b");
PEG.Compiler.resetUniqueIdentifierCounters();
strictEqual(PEG.Compiler.generateUniqueIdentifier("a"), ida1);
strictEqual(PEG.Compiler.generateUniqueIdentifier("a"), ida2);
strictEqual(PEG.Compiler.generateUniqueIdentifier("b"), idb1);
strictEqual(PEG.Compiler.generateUniqueIdentifier("b"), idb2);
});
/* ===== PEG ===== */
module("PEG");

Loading…
Cancel
Save