/* Emits the generated code for the AST. */ PEG.compiler.emitter = function(ast) { /* * Takes parts of code, interpolates variables inside them and joins them with * a newline. * * Variables are delimited with "${" and "}" and their names must be valid * identifiers (i.e. they must match [a-zA-Z_][a-zA-Z0-9_]*). Variable values * are specified as properties of the last parameter (if this is an object, * otherwise empty variable set is assumed). Undefined variables result in * throwing |Error|. * * There can be a filter specified after the variable name, prefixed with "|". * The filter name must be a valid identifier. The only recognized filter * right now is "string", which quotes the variable value as a JavaScript * string. Unrecognized filters result in throwing |Error|. * * If any part has multiple lines and the first line is indented by some * amount of whitespace (as defined by the /\s+/ JavaScript regular * expression), second to last lines are indented by the same amount of * whitespace. This results in nicely indented multiline code in variables * without making the templates look ugly. * * Examples: * * formatCode("foo", "bar"); // "foo\nbar" * formatCode("foo", "${bar}", { bar: "baz" }); // "foo\nbaz" * formatCode("foo", "${bar}"); // throws Error * formatCode("foo", "${bar|string}", { bar: "baz" }); // "foo\n\"baz\"" * formatCode("foo", "${bar|eeek}", { bar: "baz" }); // throws Error * formatCode("foo", "${bar}", { bar: " baz\nqux" }); // "foo\n baz\n qux" */ function formatCode() { function interpolateVariablesInParts(parts) { return map(parts, function(part) { return part.replace( /\$\{([a-zA-Z_][a-zA-Z0-9_]*)(\|([a-zA-Z_][a-zA-Z0-9_]*))?\}/g, function(match, name, dummy, filter) { var value = vars[name]; if (value === undefined) { throw new Error("Undefined variable: \"" + name + "\"."); } if (filter !== undefined && filter !== "") { // JavaScript engines differ here. if (filter === "string") { return quote(value); } else { throw new Error("Unrecognized filter: \"" + filter + "\"."); } } else { return value; } } ); }); } function indentMultilineParts(parts) { return map(parts, function(part) { if (!/\n/.test(part)) { return part; } var firstLineWhitespacePrefix = part.match(/^\s*/)[0]; var lines = part.split("\n"); var linesIndented = [lines[0]].concat( map(lines.slice(1), function(line) { return firstLineWhitespacePrefix + line; }) ); return linesIndented.join("\n"); }); } var args = Array.prototype.slice.call(arguments); var vars = args[args.length - 1] instanceof Object ? args.pop() : {}; return indentMultilineParts(interpolateVariablesInParts(args)).join("\n"); } function resultVar(index) { return "result" + index; } function posVar(index) { return "pos" + index; } var emit = buildNodeVisitor({ grammar: function(node) { var initializerCode = node.initializer !== null ? emit(node.initializer) : ""; var name; var parseFunctionTableItems = []; for (name in node.rules) { parseFunctionTableItems.push(quote(name) + ": parse_" + name); } parseFunctionTableItems.sort(); var parseFunctionDefinitions = []; for (name in node.rules) { parseFunctionDefinitions.push(emit(node.rules[name])); } return formatCode( '(function(){', ' /* Generated by PEG.js @VERSION (http://pegjs.majda.cz/). */', ' ', ' var result = {', ' /*', ' * Parses the input with a generated parser. If the parsing is successfull,', ' * returns a value explicitly or implicitly specified by the grammar from', ' * which the parser was generated (see |PEG.buildParser|). If the parsing is', ' * unsuccessful, throws |PEG.parser.SyntaxError| describing the error.', ' */', ' parse: function(input, startRule) {', ' var parseFunctions = {', ' ${parseFunctionTableItems}', ' };', ' ', ' if (startRule !== undefined) {', ' if (parseFunctions[startRule] === undefined) {', ' throw new Error("Invalid rule name: " + quote(startRule) + ".");', ' }', ' } else {', ' startRule = ${startRule|string};', ' }', ' ', ' var pos = 0;', ' var reportFailures = 0;', // 0 = report, anything > 0 = do not report ' var rightmostFailuresPos = 0;', ' var rightmostFailuresExpected = [];', ' var cache = {};', ' ', /* This needs to be in sync with |padLeft| in utils.js. */ ' function padLeft(input, padding, length) {', ' var result = input;', ' ', ' var padLength = length - input.length;', ' for (var i = 0; i < padLength; i++) {', ' result = padding + result;', ' }', ' ', ' return result;', ' }', ' ', /* This needs to be in sync with |escape| in utils.js. */ ' function escape(ch) {', ' var charCode = ch.charCodeAt(0);', ' var escapeChar;', ' var length;', ' ', ' if (charCode <= 0xFF) {', ' escapeChar = \'x\';', ' length = 2;', ' } else {', ' escapeChar = \'u\';', ' length = 4;', ' }', ' ', ' return \'\\\\\' + escapeChar + padLeft(charCode.toString(16).toUpperCase(), \'0\', length);', ' }', ' ', /* This needs to be in sync with |quote| in utils.js. */ ' function quote(s) {', ' /*', ' * ECMA-262, 5th ed., 7.8.4: All characters may appear literally in a', ' * string literal except for the closing quote character, backslash,', ' * carriage return, line separator, paragraph separator, and line feed.', ' * Any character may appear in the form of an escape sequence.', ' *', ' * For portability, we also escape escape all control and non-ASCII', ' * characters. Note that "\\0" and "\\v" escape sequences are not used', ' * because JSHint does not like the first and IE the second.', ' */', ' return \'"\' + s', ' .replace(/\\\\/g, \'\\\\\\\\\') // backslash', ' .replace(/"/g, \'\\\\"\') // closing quote character', ' .replace(/\\x08/g, \'\\\\b\') // backspace', ' .replace(/\\t/g, \'\\\\t\') // horizontal tab', ' .replace(/\\n/g, \'\\\\n\') // line feed', ' .replace(/\\f/g, \'\\\\f\') // form feed', ' .replace(/\\r/g, \'\\\\r\') // carriage return', ' .replace(/[\\x00-\\x07\\x0B\\x0E-\\x1F\\x80-\\uFFFF]/g, escape)', ' + \'"\';', ' }', ' ', ' function matchFailed(failure) {', ' if (pos < rightmostFailuresPos) {', ' return;', ' }', ' ', ' if (pos > rightmostFailuresPos) {', ' rightmostFailuresPos = pos;', ' rightmostFailuresExpected = [];', ' }', ' ', ' rightmostFailuresExpected.push(failure);', ' }', ' ', ' ${parseFunctionDefinitions}', ' ', ' function buildErrorMessage() {', ' function buildExpected(failuresExpected) {', ' failuresExpected.sort();', ' ', ' var lastFailure = null;', ' var failuresExpectedUnique = [];', ' for (var i = 0; i < failuresExpected.length; i++) {', ' if (failuresExpected[i] !== lastFailure) {', ' failuresExpectedUnique.push(failuresExpected[i]);', ' lastFailure = failuresExpected[i];', ' }', ' }', ' ', ' switch (failuresExpectedUnique.length) {', ' case 0:', ' return "end of input";', ' case 1:', ' return failuresExpectedUnique[0];', ' default:', ' return failuresExpectedUnique.slice(0, failuresExpectedUnique.length - 1).join(", ")', ' + " or "', ' + failuresExpectedUnique[failuresExpectedUnique.length - 1];', ' }', ' }', ' ', ' var expected = buildExpected(rightmostFailuresExpected);', ' var actualPos = Math.max(pos, rightmostFailuresPos);', ' var actual = actualPos < input.length', ' ? quote(input.charAt(actualPos))', ' : "end of input";', ' ', ' return "Expected " + expected + " but " + actual + " found.";', ' }', ' ', ' function computeErrorPosition() {', ' /*', ' * The first idea was to use |String.split| to break the input up to the', ' * error position along newlines and derive the line and column from', ' * there. However IE\'s |split| implementation is so broken that it was', ' * enough to prevent it.', ' */', ' ', ' var line = 1;', ' var column = 1;', ' var seenCR = false;', ' ', ' for (var i = 0; i < rightmostFailuresPos; i++) {', ' var ch = input.charAt(i);', ' if (ch === "\\n") {', ' if (!seenCR) { line++; }', ' column = 1;', ' seenCR = false;', ' } else if (ch === "\\r" || ch === "\\u2028" || ch === "\\u2029") {', ' line++;', ' column = 1;', ' seenCR = true;', ' } else {', ' column++;', ' seenCR = false;', ' }', ' }', ' ', ' return { line: line, column: column };', ' }', ' ', ' ${initializerCode}', ' ', ' var result = parseFunctions[startRule]();', ' ', ' /*', ' * The parser is now in one of the following three states:', ' *', ' * 1. The parser successfully parsed the whole input.', ' *', ' * - |result !== null|', ' * - |pos === input.length|', ' * - |rightmostFailuresExpected| may or may not contain something', ' *', ' * 2. The parser successfully parsed only a part of the input.', ' *', ' * - |result !== null|', ' * - |pos < input.length|', ' * - |rightmostFailuresExpected| may or may not contain something', ' *', ' * 3. The parser did not successfully parse any part of the input.', ' *', ' * - |result === null|', ' * - |pos === 0|', ' * - |rightmostFailuresExpected| contains at least one failure', ' *', ' * All code following this comment (including called functions) must', ' * handle these states.', ' */', ' if (result === null || pos !== input.length) {', ' var errorPosition = computeErrorPosition();', ' throw new this.SyntaxError(', ' buildErrorMessage(),', ' errorPosition.line,', ' errorPosition.column', ' );', ' }', ' ', ' return result;', ' },', ' ', ' /* Returns the parser source code. */', ' toSource: function() { return this._source; }', ' };', ' ', ' /* Thrown when a parser encounters a syntax error. */', ' ', ' result.SyntaxError = function(message, line, column) {', ' this.name = "SyntaxError";', ' this.message = message;', ' this.line = line;', ' this.column = column;', ' };', ' ', ' result.SyntaxError.prototype = Error.prototype;', ' ', ' return result;', '})()', { initializerCode: initializerCode, parseFunctionTableItems: parseFunctionTableItems.join(',\n'), parseFunctionDefinitions: parseFunctionDefinitions.join('\n\n'), startRule: node.startRule } ); }, initializer: function(node) { return node.code; }, rule: function(node) { var context = { resultIndex: 0, posIndex: 0 }; var resultVars = map(range(node.resultStackDepth), resultVar); var posVars = map(range(node.posStackDepth), posVar); var resultVarsCode = resultVars.length > 0 ? 'var ' + resultVars.join(', ') + ';' : ''; var posVarsCode = posVars.length > 0 ? 'var ' + posVars.join(', ') + ';' : ''; var setReportFailuresCode; var restoreReportFailuresCode; var reportFailureCode; if (node.displayName !== null) { setReportFailuresCode = formatCode( 'reportFailures++;' ); restoreReportFailuresCode = formatCode( 'reportFailures--;' ); reportFailureCode = formatCode( 'if (reportFailures === 0 && ${resultVar} === null) {', ' matchFailed(${displayName|string});', '}', { displayName: node.displayName, resultVar: resultVar(context.resultIndex) } ); } else { setReportFailuresCode = ""; restoreReportFailuresCode = ""; reportFailureCode = ""; } return formatCode( 'function parse_${name}() {', ' var cacheKey = "${name}@" + pos;', ' var cachedResult = cache[cacheKey];', ' if (cachedResult) {', ' pos = cachedResult.nextPos;', ' return cachedResult.result;', ' }', ' ', ' ${resultVarsCode}', ' ${posVarsCode}', ' ', ' ${setReportFailuresCode}', ' ${code}', ' ${restoreReportFailuresCode}', ' ${reportFailureCode}', ' ', ' cache[cacheKey] = {', ' nextPos: pos,', ' result: ${resultVar}', ' };', ' return ${resultVar};', '}', { name: node.name, resultVarsCode: resultVarsCode, posVarsCode: posVarsCode, setReportFailuresCode: setReportFailuresCode, restoreReportFailuresCode: restoreReportFailuresCode, reportFailureCode: reportFailureCode, code: emit(node.expression, context), resultVar: resultVar(context.resultIndex) } ); }, /* * The contract for all code fragments generated by the following functions * is as follows. * * The code fragment tries to match a part of the input starting with the * position indicated in |pos|. That position may point past the end of the * input. * * * If the code fragment matches the input, it advances |pos| to point to * the first chracter following the matched part of the input and sets * variable with a name computed by calling * |resultVar(context.resultIndex)| to an appropriate value. This value is * always non-|null|. * * * If the code fragment does not match the input, it returns with |pos| * set to the original value and it sets a variable with a name computed * by calling |resultVar(context.resultIndex)| to |null|. * * The code can use variables with names computed by calling * * |resultVar(context.resultIndex + i)| * * and * * |posVar(context.posIndex + i)| * * where |i| >= 1 to store necessary data (return values and positions). It * won't use any other variables. */ choice: function(node, context) { var code, nextAlternativesCode; for (var i = node.alternatives.length - 1; i >= 0; i--) { nextAlternativesCode = i !== node.alternatives.length - 1 ? formatCode( 'if (${resultVar} === null) {', ' ${code}', '}', { code: code, resultVar: resultVar(context.resultIndex) } ) : ''; code = formatCode( '${currentAlternativeCode}', '${nextAlternativesCode}', { currentAlternativeCode: emit(node.alternatives[i], context), nextAlternativesCode: nextAlternativesCode } ); } return code; }, sequence: function(node, context) { var elementResultVars = map(node.elements, function(element, i) { return resultVar(context.resultIndex + i); }); var code = formatCode( '${resultVar} = ${elementResultVarArray};', { resultVar: resultVar(context.resultIndex), elementResultVarArray: '[' + elementResultVars.join(', ') + ']' } ); var elementContext; for (var i = node.elements.length - 1; i >= 0; i--) { elementContext = { resultIndex: context.resultIndex + i, posIndex: context.posIndex + 1 }; code = formatCode( '${elementCode}', 'if (${elementResultVar} !== null) {', ' ${code}', '} else {', ' ${resultVar} = null;', ' pos = ${posVar};', '}', { elementCode: emit(node.elements[i], elementContext), elementResultVar: elementResultVars[i], code: code, posVar: posVar(context.posIndex), resultVar: resultVar(context.resultIndex) } ); } return formatCode( '${posVar} = pos;', '${code}', { code: code, posVar: posVar(context.posIndex) } ); }, labeled: function(node, context) { return emit(node.expression, context); }, simple_and: function(node, context) { var expressionContext = { resultIndex: context.resultIndex, posIndex: context.posIndex + 1 }; return formatCode( '${posVar} = pos;', 'reportFailures++;', '${expressionCode}', 'reportFailures--;', 'if (${resultVar} !== null) {', ' ${resultVar} = "";', ' pos = ${posVar};', '} else {', ' ${resultVar} = null;', '}', { expressionCode: emit(node.expression, expressionContext), posVar: posVar(context.posIndex), resultVar: resultVar(context.resultIndex) } ); }, simple_not: function(node, context) { var expressionContext = { resultIndex: context.resultIndex, posIndex: context.posIndex + 1 }; return formatCode( '${posVar} = pos;', 'reportFailures++;', '${expressionCode}', 'reportFailures--;', 'if (${resultVar} === null) {', ' ${resultVar} = "";', '} else {', ' ${resultVar} = null;', ' pos = ${posVar};', '}', { expressionCode: emit(node.expression, expressionContext), posVar: posVar(context.posIndex), resultVar: resultVar(context.resultIndex) } ); }, semantic_and: function(node, context) { return formatCode( '${resultVar} = (function() {${actionCode}})() ? "" : null;', { actionCode: node.code, resultVar: resultVar(context.resultIndex) } ); }, semantic_not: function(node, context) { return formatCode( '${resultVar} = (function() {${actionCode}})() ? null : "";', { actionCode: node.code, resultVar: resultVar(context.resultIndex) } ); }, optional: function(node, context) { return formatCode( '${expressionCode}', '${resultVar} = ${resultVar} !== null ? ${resultVar} : "";', { expressionCode: emit(node.expression, context), resultVar: resultVar(context.resultIndex) } ); }, zero_or_more: function(node, context) { var expressionContext = { resultIndex: context.resultIndex + 1, posIndex: context.posIndex }; return formatCode( '${resultVar} = [];', '${expressionCode}', 'while (${expressionResultVar} !== null) {', ' ${resultVar}.push(${expressionResultVar});', ' ${expressionCode}', '}', { expressionCode: emit(node.expression, expressionContext), expressionResultVar: resultVar(context.resultIndex + 1), resultVar: resultVar(context.resultIndex) } ); }, one_or_more: function(node, context) { var expressionContext = { resultIndex: context.resultIndex + 1, posIndex: context.posIndex }; return formatCode( '${expressionCode}', 'if (${expressionResultVar} !== null) {', ' ${resultVar} = [];', ' while (${expressionResultVar} !== null) {', ' ${resultVar}.push(${expressionResultVar});', ' ${expressionCode}', ' }', '} else {', ' ${resultVar} = null;', '}', { expressionCode: emit(node.expression, expressionContext), expressionResultVar: resultVar(context.resultIndex + 1), resultVar: resultVar(context.resultIndex) } ); }, action: function(node, context) { /* * In case of sequences, we splat their elements into function arguments * one by one. Example: * * start: a:"a" b:"b" c:"c" { alert(arguments.length) } // => 3 * * This behavior is reflected in this function. */ var expressionContext = { resultIndex: context.resultIndex, posIndex: context.posIndex + 1 }; var formalParams; var actualParams; if (node.expression.type === "sequence") { formalParams = []; actualParams = []; var elements = node.expression.elements; var elementsLength = elements.length; for (var i = 0; i < elementsLength; i++) { if (elements[i].type === "labeled") { formalParams.push(elements[i].label); actualParams.push(resultVar(context.resultIndex) + '[' + i + ']'); } } } else if (node.expression.type === "labeled") { formalParams = [node.expression.label]; actualParams = [resultVar(context.resultIndex)]; } else { formalParams = []; actualParams = []; } return formatCode( '${posVar} = pos;', '${expressionCode}', 'if (${resultVar} !== null) {', ' ${resultVar} = (function(${formalParams}) {${actionCode}})(${actualParams});', '}', 'if (${resultVar} === null) {', ' pos = ${posVar};', '}', { expressionCode: emit(node.expression, expressionContext), actionCode: node.code, formalParams: formalParams.join(', '), actualParams: actualParams.join(', '), posVar: posVar(context.posIndex), resultVar: resultVar(context.resultIndex) } ); }, rule_ref: function(node, context) { return formatCode( '${resultVar} = ${ruleMethod}();', { ruleMethod: 'parse_' + node.name, resultVar: resultVar(context.resultIndex) } ); }, literal: function(node, context) { var length = node.value.length; if (length === 0) { return formatCode( '${resultVar} = "";', { resultVar: resultVar(context.resultIndex) } ); } var testCode = length === 1 ? formatCode( 'input.charCodeAt(pos) === ${valueCharCode}', { valueCharCode: node.value.charCodeAt(0) } ) : formatCode( 'input.substr(pos, ${length}) === ${value|string}', { value: node.value, length: length } ); return formatCode( 'if (${testCode}) {', ' ${resultVar} = ${value|string};', ' pos += ${length};', '} else {', ' ${resultVar} = null;', ' if (reportFailures === 0) {', ' matchFailed(${valueQuoted|string});', ' }', '}', { testCode: testCode, value: node.value, valueQuoted: quote(node.value), length: length, resultVar: resultVar(context.resultIndex) } ); }, any: function(node, context) { return formatCode( 'if (input.length > pos) {', ' ${resultVar} = input.charAt(pos);', ' pos++;', '} else {', ' ${resultVar} = null;', ' if (reportFailures === 0) {', ' matchFailed("any character");', ' }', '}', { resultVar: resultVar(context.resultIndex) } ); }, "class": function(node, context) { var regexp; if (node.parts.length > 0) { regexp = '/^[' + (node.inverted ? '^' : '') + map(node.parts, function(part) { return part instanceof Array ? quoteForRegexpClass(part[0]) + '-' + quoteForRegexpClass(part[1]) : quoteForRegexpClass(part); }).join('') + ']/'; } else { /* * Stupid IE considers regexps /[]/ and /[^]/ syntactically invalid, so * we translate them into euqivalents it can handle. */ regexp = node.inverted ? '/^[\\S\\s]/' : '/^(?!)/'; } return formatCode( 'if (${regexp}.test(input.charAt(pos))) {', ' ${resultVar} = input.charAt(pos);', ' pos++;', '} else {', ' ${resultVar} = null;', ' if (reportFailures === 0) {', ' matchFailed(${rawText|string});', ' }', '}', { regexp: regexp, rawText: node.rawText, resultVar: resultVar(context.resultIndex) } ); } }); return emit(ast); };