You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

812 lines
27 KiB
JavaScript

/* Emits the generated code for the AST. */
PEG.compiler.emitter = function(ast) {
/*
* Takes parts of code, interpolates variables inside them and joins them with
* a newline.
*
* Variables are delimited with "${" and "}" and their names must be valid
* identifiers (i.e. they must match [a-zA-Z_][a-zA-Z0-9_]*). Variable values
* are specified as properties of the last parameter (if this is an object,
* otherwise empty variable set is assumed). Undefined variables result in
* throwing |Error|.
*
* There can be a filter specified after the variable name, prefixed with "|".
* The filter name must be a valid identifier. The only recognized filter
* right now is "string", which quotes the variable value as a JavaScript
* string. Unrecognized filters result in throwing |Error|.
*
* If any part has multiple lines and the first line is indented by some
* amount of whitespace (as defined by the /\s+/ JavaScript regular
* expression), second to last lines are indented by the same amount of
* whitespace. This results in nicely indented multiline code in variables
* without making the templates look ugly.
*
* Examples:
*
* formatCode("foo", "bar"); // "foo\nbar"
* formatCode("foo", "${bar}", { bar: "baz" }); // "foo\nbaz"
* formatCode("foo", "${bar}"); // throws Error
* formatCode("foo", "${bar|string}", { bar: "baz" }); // "foo\n\"baz\""
* formatCode("foo", "${bar|eeek}", { bar: "baz" }); // throws Error
* formatCode("foo", "${bar}", { bar: " baz\nqux" }); // "foo\n baz\n qux"
*/
function formatCode() {
function interpolateVariablesInParts(parts) {
return map(parts, function(part) {
return part.replace(
/\$\{([a-zA-Z_][a-zA-Z0-9_]*)(\|([a-zA-Z_][a-zA-Z0-9_]*))?\}/g,
function(match, name, dummy, filter) {
var value = vars[name];
if (value === undefined) {
throw new Error("Undefined variable: \"" + name + "\".");
}
if (filter !== undefined && filter !== "") { // JavaScript engines differ here.
if (filter === "string") {
return quote(value);
} else {
throw new Error("Unrecognized filter: \"" + filter + "\".");
}
} else {
return value;
}
}
);
});
}
function indentMultilineParts(parts) {
return map(parts, function(part) {
if (!/\n/.test(part)) { return part; }
var firstLineWhitespacePrefix = part.match(/^\s*/)[0];
var lines = part.split("\n");
var linesIndented = [lines[0]].concat(
map(lines.slice(1), function(line) {
return firstLineWhitespacePrefix + line;
})
);
return linesIndented.join("\n");
});
}
var args = Array.prototype.slice.call(arguments);
var vars = args[args.length - 1] instanceof Object ? args.pop() : {};
return indentMultilineParts(interpolateVariablesInParts(args)).join("\n");
}
function resultVar(index) { return "result" + index; }
function posVar(index) { return "pos" + index; }
var emit = buildNodeVisitor({
grammar: function(node) {
var initializerCode = node.initializer !== null
? emit(node.initializer)
: "";
var name;
var parseFunctionTableItems = [];
for (name in node.rules) {
parseFunctionTableItems.push(quote(name) + ": parse_" + name);
}
parseFunctionTableItems.sort();
var parseFunctionDefinitions = [];
for (name in node.rules) {
parseFunctionDefinitions.push(emit(node.rules[name]));
}
return formatCode(
'(function(){',
' /* Generated by PEG.js @VERSION (http://pegjs.majda.cz/). */',
' ',
' var result = {',
' /*',
' * Parses the input with a generated parser. If the parsing is successfull,',
' * returns a value explicitly or implicitly specified by the grammar from',
' * which the parser was generated (see |PEG.buildParser|). If the parsing is',
' * unsuccessful, throws |PEG.parser.SyntaxError| describing the error.',
' */',
' parse: function(input, startRule) {',
' var parseFunctions = {',
' ${parseFunctionTableItems}',
' };',
' ',
' if (startRule !== undefined) {',
' if (parseFunctions[startRule] === undefined) {',
' throw new Error("Invalid rule name: " + quote(startRule) + ".");',
' }',
' } else {',
' startRule = ${startRule|string};',
' }',
' ',
' var pos = 0;',
' var reportFailures = 0;', // 0 = report, anything > 0 = do not report
' var rightmostFailuresPos = 0;',
' var rightmostFailuresExpected = [];',
' var cache = {};',
' ',
/* This needs to be in sync with |padLeft| in utils.js. */
' function padLeft(input, padding, length) {',
' var result = input;',
' ',
' var padLength = length - input.length;',
' for (var i = 0; i < padLength; i++) {',
' result = padding + result;',
' }',
' ',
' return result;',
' }',
' ',
/* This needs to be in sync with |escape| in utils.js. */
' function escape(ch) {',
' var charCode = ch.charCodeAt(0);',
' var escapeChar;',
' var length;',
' ',
' if (charCode <= 0xFF) {',
' escapeChar = \'x\';',
' length = 2;',
' } else {',
' escapeChar = \'u\';',
' length = 4;',
' }',
' ',
' return \'\\\\\' + escapeChar + padLeft(charCode.toString(16).toUpperCase(), \'0\', length);',
' }',
' ',
/* This needs to be in sync with |quote| in utils.js. */
' function quote(s) {',
' /*',
' * ECMA-262, 5th ed., 7.8.4: All characters may appear literally in a',
' * string literal except for the closing quote character, backslash,',
' * carriage return, line separator, paragraph separator, and line feed.',
' * Any character may appear in the form of an escape sequence.',
' *',
' * For portability, we also escape escape all control and non-ASCII',
' * characters. Note that "\\0" and "\\v" escape sequences are not used',
' * because JSHint does not like the first and IE the second.',
' */',
' return \'"\' + s',
' .replace(/\\\\/g, \'\\\\\\\\\') // backslash',
' .replace(/"/g, \'\\\\"\') // closing quote character',
' .replace(/\\x08/g, \'\\\\b\') // backspace',
' .replace(/\\t/g, \'\\\\t\') // horizontal tab',
' .replace(/\\n/g, \'\\\\n\') // line feed',
' .replace(/\\f/g, \'\\\\f\') // form feed',
' .replace(/\\r/g, \'\\\\r\') // carriage return',
' .replace(/[\\x00-\\x07\\x0B\\x0E-\\x1F\\x80-\\uFFFF]/g, escape)',
' + \'"\';',
' }',
' ',
' function matchFailed(failure) {',
' if (pos < rightmostFailuresPos) {',
' return;',
' }',
' ',
' if (pos > rightmostFailuresPos) {',
' rightmostFailuresPos = pos;',
' rightmostFailuresExpected = [];',
' }',
' ',
' rightmostFailuresExpected.push(failure);',
' }',
' ',
' ${parseFunctionDefinitions}',
' ',
' function buildErrorMessage() {',
' function buildExpected(failuresExpected) {',
' failuresExpected.sort();',
' ',
' var lastFailure = null;',
' var failuresExpectedUnique = [];',
' for (var i = 0; i < failuresExpected.length; i++) {',
' if (failuresExpected[i] !== lastFailure) {',
' failuresExpectedUnique.push(failuresExpected[i]);',
' lastFailure = failuresExpected[i];',
' }',
' }',
' ',
' switch (failuresExpectedUnique.length) {',
' case 0:',
' return "end of input";',
' case 1:',
' return failuresExpectedUnique[0];',
' default:',
' return failuresExpectedUnique.slice(0, failuresExpectedUnique.length - 1).join(", ")',
' + " or "',
' + failuresExpectedUnique[failuresExpectedUnique.length - 1];',
' }',
' }',
' ',
' var expected = buildExpected(rightmostFailuresExpected);',
' var actualPos = Math.max(pos, rightmostFailuresPos);',
' var actual = actualPos < input.length',
' ? quote(input.charAt(actualPos))',
' : "end of input";',
' ',
' return "Expected " + expected + " but " + actual + " found.";',
' }',
' ',
' function computeErrorPosition() {',
' /*',
' * The first idea was to use |String.split| to break the input up to the',
' * error position along newlines and derive the line and column from',
' * there. However IE\'s |split| implementation is so broken that it was',
' * enough to prevent it.',
' */',
' ',
' var line = 1;',
' var column = 1;',
' var seenCR = false;',
' ',
' for (var i = 0; i < rightmostFailuresPos; i++) {',
' var ch = input.charAt(i);',
' if (ch === "\\n") {',
' if (!seenCR) { line++; }',
' column = 1;',
' seenCR = false;',
' } else if (ch === "\\r" || ch === "\\u2028" || ch === "\\u2029") {',
' line++;',
' column = 1;',
' seenCR = true;',
' } else {',
' column++;',
' seenCR = false;',
' }',
' }',
' ',
' return { line: line, column: column };',
' }',
' ',
' ${initializerCode}',
' ',
' var result = parseFunctions[startRule]();',
' ',
' /*',
' * The parser is now in one of the following three states:',
' *',
' * 1. The parser successfully parsed the whole input.',
' *',
' * - |result !== null|',
' * - |pos === input.length|',
' * - |rightmostFailuresExpected| may or may not contain something',
' *',
' * 2. The parser successfully parsed only a part of the input.',
' *',
' * - |result !== null|',
' * - |pos < input.length|',
' * - |rightmostFailuresExpected| may or may not contain something',
' *',
' * 3. The parser did not successfully parse any part of the input.',
' *',
' * - |result === null|',
' * - |pos === 0|',
' * - |rightmostFailuresExpected| contains at least one failure',
' *',
' * All code following this comment (including called functions) must',
' * handle these states.',
' */',
' if (result === null || pos !== input.length) {',
' var errorPosition = computeErrorPosition();',
' throw new this.SyntaxError(',
' buildErrorMessage(),',
' errorPosition.line,',
' errorPosition.column',
' );',
' }',
' ',
' return result;',
' },',
' ',
' /* Returns the parser source code. */',
' toSource: function() { return this._source; }',
' };',
' ',
' /* Thrown when a parser encounters a syntax error. */',
' ',
' result.SyntaxError = function(message, line, column) {',
' this.name = "SyntaxError";',
' this.message = message;',
' this.line = line;',
' this.column = column;',
' };',
' ',
' result.SyntaxError.prototype = Error.prototype;',
' ',
' return result;',
'})()',
{
initializerCode: initializerCode,
parseFunctionTableItems: parseFunctionTableItems.join(',\n'),
parseFunctionDefinitions: parseFunctionDefinitions.join('\n\n'),
startRule: node.startRule
}
);
},
initializer: function(node) {
return node.code;
},
rule: function(node) {
var context = {
resultIndex: 0,
posIndex: 0
};
var resultVars = map(range(node.resultStackDepth), resultVar);
var posVars = map(range(node.posStackDepth), posVar);
var resultVarsCode = resultVars.length > 0 ? 'var ' + resultVars.join(', ') + ';' : '';
var posVarsCode = posVars.length > 0 ? 'var ' + posVars.join(', ') + ';' : '';
var setReportFailuresCode;
var restoreReportFailuresCode;
var reportFailureCode;
if (node.displayName !== null) {
setReportFailuresCode = formatCode(
'reportFailures++;'
);
restoreReportFailuresCode = formatCode(
'reportFailures--;'
);
reportFailureCode = formatCode(
'if (reportFailures === 0 && ${resultVar} === null) {',
' matchFailed(${displayName|string});',
'}',
{
displayName: node.displayName,
resultVar: resultVar(context.resultIndex)
}
);
} else {
setReportFailuresCode = "";
restoreReportFailuresCode = "";
reportFailureCode = "";
}
return formatCode(
'function parse_${name}() {',
' var cacheKey = "${name}@" + pos;',
' var cachedResult = cache[cacheKey];',
' if (cachedResult) {',
' pos = cachedResult.nextPos;',
' return cachedResult.result;',
' }',
' ',
' ${resultVarsCode}',
' ${posVarsCode}',
' ',
' ${setReportFailuresCode}',
' ${code}',
' ${restoreReportFailuresCode}',
' ${reportFailureCode}',
' ',
' cache[cacheKey] = {',
' nextPos: pos,',
' result: ${resultVar}',
' };',
' return ${resultVar};',
'}',
{
name: node.name,
resultVarsCode: resultVarsCode,
posVarsCode: posVarsCode,
setReportFailuresCode: setReportFailuresCode,
restoreReportFailuresCode: restoreReportFailuresCode,
reportFailureCode: reportFailureCode,
code: emit(node.expression, context),
resultVar: resultVar(context.resultIndex)
}
);
},
/*
* The contract for all code fragments generated by the following functions
* is as follows.
*
* The code fragment tries to match a part of the input starting with the
* position indicated in |pos|. That position may point past the end of the
* input.
*
* * If the code fragment matches the input, it advances |pos| to point to
* the first chracter following the matched part of the input and sets
* variable with a name computed by calling
* |resultVar(context.resultIndex)| to an appropriate value. This value is
* always non-|null|.
*
* * If the code fragment does not match the input, it returns with |pos|
* set to the original value and it sets a variable with a name computed
* by calling |resultVar(context.resultIndex)| to |null|.
*
* The code can use variables with names computed by calling
*
* |resultVar(context.resultIndex + i)|
*
* and
*
* |posVar(context.posIndex + i)|
*
* where |i| >= 1 to store necessary data (return values and positions). It
* won't use any other variables.
*/
choice: function(node, context) {
var code, nextAlternativesCode;
for (var i = node.alternatives.length - 1; i >= 0; i--) {
nextAlternativesCode = i !== node.alternatives.length - 1
? formatCode(
'if (${resultVar} === null) {',
' ${code}',
'}',
{
code: code,
resultVar: resultVar(context.resultIndex)
}
)
: '';
code = formatCode(
'${currentAlternativeCode}',
'${nextAlternativesCode}',
{
currentAlternativeCode: emit(node.alternatives[i], context),
nextAlternativesCode: nextAlternativesCode
}
);
}
return code;
},
sequence: function(node, context) {
var elementResultVars = map(node.elements, function(element, i) {
return resultVar(context.resultIndex + i);
});
var code = formatCode(
'${resultVar} = ${elementResultVarArray};',
{
resultVar: resultVar(context.resultIndex),
elementResultVarArray: '[' + elementResultVars.join(', ') + ']'
}
);
var elementContext;
for (var i = node.elements.length - 1; i >= 0; i--) {
elementContext = {
resultIndex: context.resultIndex + i,
posIndex: context.posIndex + 1
};
code = formatCode(
'${elementCode}',
'if (${elementResultVar} !== null) {',
' ${code}',
'} else {',
' ${resultVar} = null;',
' pos = ${posVar};',
'}',
{
elementCode: emit(node.elements[i], elementContext),
elementResultVar: elementResultVars[i],
code: code,
posVar: posVar(context.posIndex),
resultVar: resultVar(context.resultIndex)
}
);
}
return formatCode(
'${posVar} = pos;',
'${code}',
{
code: code,
posVar: posVar(context.posIndex)
}
);
},
labeled: function(node, context) {
return emit(node.expression, context);
},
simple_and: function(node, context) {
var expressionContext = {
resultIndex: context.resultIndex,
posIndex: context.posIndex + 1
};
return formatCode(
'${posVar} = pos;',
'reportFailures++;',
'${expressionCode}',
'reportFailures--;',
'if (${resultVar} !== null) {',
' ${resultVar} = "";',
' pos = ${posVar};',
'} else {',
' ${resultVar} = null;',
'}',
{
expressionCode: emit(node.expression, expressionContext),
posVar: posVar(context.posIndex),
resultVar: resultVar(context.resultIndex)
}
);
},
simple_not: function(node, context) {
var expressionContext = {
resultIndex: context.resultIndex,
posIndex: context.posIndex + 1
};
return formatCode(
'${posVar} = pos;',
'reportFailures++;',
'${expressionCode}',
'reportFailures--;',
'if (${resultVar} === null) {',
' ${resultVar} = "";',
'} else {',
' ${resultVar} = null;',
' pos = ${posVar};',
'}',
{
expressionCode: emit(node.expression, expressionContext),
posVar: posVar(context.posIndex),
resultVar: resultVar(context.resultIndex)
}
);
},
semantic_and: function(node, context) {
return formatCode(
'${resultVar} = (function() {${actionCode}})() ? "" : null;',
{
actionCode: node.code,
resultVar: resultVar(context.resultIndex)
}
);
},
semantic_not: function(node, context) {
return formatCode(
'${resultVar} = (function() {${actionCode}})() ? null : "";',
{
actionCode: node.code,
resultVar: resultVar(context.resultIndex)
}
);
},
optional: function(node, context) {
return formatCode(
'${expressionCode}',
'${resultVar} = ${resultVar} !== null ? ${resultVar} : "";',
{
expressionCode: emit(node.expression, context),
resultVar: resultVar(context.resultIndex)
}
);
},
zero_or_more: function(node, context) {
var expressionContext = {
resultIndex: context.resultIndex + 1,
posIndex: context.posIndex
};
return formatCode(
'${resultVar} = [];',
'${expressionCode}',
'while (${expressionResultVar} !== null) {',
' ${resultVar}.push(${expressionResultVar});',
' ${expressionCode}',
'}',
{
expressionCode: emit(node.expression, expressionContext),
expressionResultVar: resultVar(context.resultIndex + 1),
resultVar: resultVar(context.resultIndex)
}
);
},
one_or_more: function(node, context) {
var expressionContext = {
resultIndex: context.resultIndex + 1,
posIndex: context.posIndex
};
return formatCode(
'${expressionCode}',
'if (${expressionResultVar} !== null) {',
' ${resultVar} = [];',
' while (${expressionResultVar} !== null) {',
' ${resultVar}.push(${expressionResultVar});',
' ${expressionCode}',
' }',
'} else {',
' ${resultVar} = null;',
'}',
{
expressionCode: emit(node.expression, expressionContext),
expressionResultVar: resultVar(context.resultIndex + 1),
resultVar: resultVar(context.resultIndex)
}
);
},
action: function(node, context) {
/*
* In case of sequences, we splat their elements into function arguments
* one by one. Example:
*
* start: a:"a" b:"b" c:"c" { alert(arguments.length) } // => 3
*
* This behavior is reflected in this function.
*/
var expressionContext = {
resultIndex: context.resultIndex,
posIndex: context.posIndex + 1
};
var formalParams;
var actualParams;
if (node.expression.type === "sequence") {
formalParams = [];
actualParams = [];
var elements = node.expression.elements;
var elementsLength = elements.length;
for (var i = 0; i < elementsLength; i++) {
if (elements[i].type === "labeled") {
formalParams.push(elements[i].label);
actualParams.push(resultVar(context.resultIndex) + '[' + i + ']');
}
}
} else if (node.expression.type === "labeled") {
formalParams = [node.expression.label];
actualParams = [resultVar(context.resultIndex)];
} else {
formalParams = [];
actualParams = [];
}
return formatCode(
'${posVar} = pos;',
'${expressionCode}',
'if (${resultVar} !== null) {',
' ${resultVar} = (function(${formalParams}) {${actionCode}})(${actualParams});',
'}',
'if (${resultVar} === null) {',
' pos = ${posVar};',
'}',
{
expressionCode: emit(node.expression, expressionContext),
actionCode: node.code,
formalParams: formalParams.join(', '),
actualParams: actualParams.join(', '),
posVar: posVar(context.posIndex),
resultVar: resultVar(context.resultIndex)
}
);
},
rule_ref: function(node, context) {
return formatCode(
'${resultVar} = ${ruleMethod}();',
{
ruleMethod: 'parse_' + node.name,
resultVar: resultVar(context.resultIndex)
}
);
},
literal: function(node, context) {
var length = node.value.length;
if (length === 0) {
return formatCode(
'${resultVar} = "";',
{ resultVar: resultVar(context.resultIndex) }
);
}
var testCode = length === 1
? formatCode(
'input.charCodeAt(pos) === ${valueCharCode}',
{ valueCharCode: node.value.charCodeAt(0) }
)
: formatCode(
'input.substr(pos, ${length}) === ${value|string}',
{
value: node.value,
length: length
}
);
return formatCode(
'if (${testCode}) {',
' ${resultVar} = ${value|string};',
' pos += ${length};',
'} else {',
' ${resultVar} = null;',
' if (reportFailures === 0) {',
' matchFailed(${valueQuoted|string});',
' }',
'}',
{
testCode: testCode,
value: node.value,
valueQuoted: quote(node.value),
length: length,
resultVar: resultVar(context.resultIndex)
}
);
},
any: function(node, context) {
return formatCode(
'if (input.length > pos) {',
' ${resultVar} = input.charAt(pos);',
' pos++;',
'} else {',
' ${resultVar} = null;',
' if (reportFailures === 0) {',
' matchFailed("any character");',
' }',
'}',
{ resultVar: resultVar(context.resultIndex) }
);
},
"class": function(node, context) {
var regexp;
if (node.parts.length > 0) {
regexp = '/^['
+ (node.inverted ? '^' : '')
+ map(node.parts, function(part) {
return part instanceof Array
? quoteForRegexpClass(part[0])
+ '-'
+ quoteForRegexpClass(part[1])
: quoteForRegexpClass(part);
}).join('')
+ ']/';
} else {
/*
* Stupid IE considers regexps /[]/ and /[^]/ syntactically invalid, so
* we translate them into euqivalents it can handle.
*/
regexp = node.inverted ? '/^[\\S\\s]/' : '/^(?!)/';
}
return formatCode(
'if (${regexp}.test(input.charAt(pos))) {',
' ${resultVar} = input.charAt(pos);',
' pos++;',
'} else {',
' ${resultVar} = null;',
' if (reportFailures === 0) {',
' matchFailed(${rawText|string});',
' }',
'}',
{
regexp: regexp,
rawText: node.rawText,
resultVar: resultVar(context.resultIndex)
}
);
}
});
return emit(ast);
};