/* Emits the generated code for the AST. */ PEG.compiler.emitter = function(ast) { /* * Takes parts of code, interpolates variables inside them and joins them with * a newline. * * Variables are delimited with "${" and "}" and their names must be valid * identifiers (i.e. they must match [a-zA-Z_][a-zA-Z0-9_]*). Variable values * are specified as properties of the last parameter (if this is an object, * otherwise empty variable set is assumed). Undefined variables result in * throwing |Error|. * * There can be a filter specified after the variable name, prefixed with "|". * The filter name must be a valid identifier. The only recognized filter * right now is "string", which quotes the variable value as a JavaScript * string. Unrecognized filters result in throwing |Error|. * * If any part has multiple lines and the first line is indented by some * amount of whitespace (as defined by the /\s+/ JavaScript regular * expression), second to last lines are indented by the same amount of * whitespace. This results in nicely indented multiline code in variables * without making the templates look ugly. * * Examples: * * formatCode("foo", "bar"); // "foo\nbar" * formatCode("foo", "${bar}", { bar: "baz" }); // "foo\nbaz" * formatCode("foo", "${bar}"); // throws Error * formatCode("foo", "${bar|string}", { bar: "baz" }); // "foo\n\"baz\"" * formatCode("foo", "${bar|eeek}", { bar: "baz" }); // throws Error * formatCode("foo", "${bar}", { bar: " baz\nqux" }); // "foo\n baz\n qux" */ function formatCode() { function interpolateVariablesInParts(parts) { return map(parts, function(part) { return part.replace( /\$\{([a-zA-Z_][a-zA-Z0-9_]*)(\|([a-zA-Z_][a-zA-Z0-9_]*))?\}/g, function(match, name, dummy, filter) { var value = vars[name]; if (value === undefined) { throw new Error("Undefined variable: \"" + name + "\"."); } if (filter !== undefined && filter != "") { // JavaScript engines differ here. if (filter === "string") { return quote(value); } else { throw new Error("Unrecognized filter: \"" + filter + "\"."); } } else { return value; } } ); }); } function indentMultilineParts(parts) { return map(parts, function(part) { if (!/\n/.test(part)) { return part; } var firstLineWhitespacePrefix = part.match(/^\s*/)[0]; var lines = part.split("\n"); var linesIndented = [lines[0]].concat( map(lines.slice(1), function(line) { return firstLineWhitespacePrefix + line; }) ); return linesIndented.join("\n"); }); } var args = Array.prototype.slice.call(arguments); var vars = args[args.length - 1] instanceof Object ? args.pop() : {}; return indentMultilineParts(interpolateVariablesInParts(args)).join("\n"); }; function resultVar(index) { return "result" + index; } function posVar(index) { return "pos" + index; } var emit = buildNodeVisitor({ grammar: function(node) { var initializerCode = node.initializer !== null ? emit(node.initializer) : ""; var parseFunctionTableItems = []; for (var name in node.rules) { parseFunctionTableItems.push(quote(name) + ": parse_" + name); } parseFunctionTableItems.sort(); var parseFunctionDefinitions = []; for (var name in node.rules) { parseFunctionDefinitions.push(emit(node.rules[name])); } return formatCode( "(function(){", " /* Generated by PEG.js @VERSION (http://pegjs.majda.cz/). */", " ", " var result = {", " /*", " * Parses the input with a generated parser. If the parsing is successfull,", " * returns a value explicitly or implicitly specified by the grammar from", " * which the parser was generated (see |PEG.buildParser|). If the parsing is", " * unsuccessful, throws |PEG.parser.SyntaxError| describing the error.", " */", " parse: function(input, startRule) {", " var parseFunctions = {", " ${parseFunctionTableItems}", " };", " ", " if (startRule !== undefined) {", " if (parseFunctions[startRule] === undefined) {", " throw new Error(\"Invalid rule name: \" + quote(startRule) + \".\");", " }", " } else {", " startRule = ${startRule|string};", " }", " ", " var pos = 0;", " var reportFailures = 0;", // 0 = report, anything > 0 = do not report " var rightmostFailuresPos = 0;", " var rightmostFailuresExpected = [];", " var cache = {};", " ", /* This needs to be in sync with |padLeft| in utils.js. */ " function padLeft(input, padding, length) {", " var result = input;", " ", " var padLength = length - input.length;", " for (var i = 0; i < padLength; i++) {", " result = padding + result;", " }", " ", " return result;", " }", " ", /* This needs to be in sync with |escape| in utils.js. */ " function escape(ch) {", " var charCode = ch.charCodeAt(0);", " var escapeChar;", " var length;", " ", " if (charCode <= 0xFF) {", " escapeChar = 'x';", " length = 2;", " } else {", " escapeChar = 'u';", " length = 4;", " }", " ", " return '\\\\' + escapeChar + padLeft(charCode.toString(16).toUpperCase(), '0', length);", " }", " ", /* This needs to be in sync with |quote| in utils.js. */ " function quote(s) {", " /*", " * ECMA-262, 5th ed., 7.8.4: All characters may appear literally in a", " * string literal except for the closing quote character, backslash,", " * carriage return, line separator, paragraph separator, and line feed.", " * Any character may appear in the form of an escape sequence.", " *", " * For portability, we also escape escape all control and non-ASCII", " * characters. Note that \"\\0\" and \"\\v\" escape sequences are not used", " * because JSHint does not like the first and IE the second.", " */", " return '\"' + s", " .replace(/\\\\/g, '\\\\\\\\') // backslash", " .replace(/\"/g, '\\\\\"') // closing quote character", " .replace(/\\x08/g, '\\\\b') // backspace", " .replace(/\\t/g, '\\\\t') // horizontal tab", " .replace(/\\n/g, '\\\\n') // line feed", " .replace(/\\f/g, '\\\\f') // form feed", " .replace(/\\r/g, '\\\\r') // carriage return", " .replace(/[\\x00-\\x07\\x0B\\x0E-\\x1F\\x80-\\uFFFF]/g, escape)", " + '\"';", " }", " ", " function matchFailed(failure) {", " if (pos < rightmostFailuresPos) {", " return;", " }", " ", " if (pos > rightmostFailuresPos) {", " rightmostFailuresPos = pos;", " rightmostFailuresExpected = [];", " }", " ", " rightmostFailuresExpected.push(failure);", " }", " ", " ${parseFunctionDefinitions}", " ", " function buildErrorMessage() {", " function buildExpected(failuresExpected) {", " failuresExpected.sort();", " ", " var lastFailure = null;", " var failuresExpectedUnique = [];", " for (var i = 0; i < failuresExpected.length; i++) {", " if (failuresExpected[i] !== lastFailure) {", " failuresExpectedUnique.push(failuresExpected[i]);", " lastFailure = failuresExpected[i];", " }", " }", " ", " switch (failuresExpectedUnique.length) {", " case 0:", " return 'end of input';", " case 1:", " return failuresExpectedUnique[0];", " default:", " return failuresExpectedUnique.slice(0, failuresExpectedUnique.length - 1).join(', ')", " + ' or '", " + failuresExpectedUnique[failuresExpectedUnique.length - 1];", " }", " }", " ", " var expected = buildExpected(rightmostFailuresExpected);", " var actualPos = Math.max(pos, rightmostFailuresPos);", " var actual = actualPos < input.length", " ? quote(input.charAt(actualPos))", " : 'end of input';", " ", " return 'Expected ' + expected + ' but ' + actual + ' found.';", " }", " ", " function computeErrorPosition() {", " /*", " * The first idea was to use |String.split| to break the input up to the", " * error position along newlines and derive the line and column from", " * there. However IE's |split| implementation is so broken that it was", " * enough to prevent it.", " */", " ", " var line = 1;", " var column = 1;", " var seenCR = false;", " ", " for (var i = 0; i < rightmostFailuresPos; i++) {", " var ch = input.charAt(i);", " if (ch === '\\n') {", " if (!seenCR) { line++; }", " column = 1;", " seenCR = false;", " } else if (ch === '\\r' | ch === '\\u2028' || ch === '\\u2029') {", " line++;", " column = 1;", " seenCR = true;", " } else {", " column++;", " seenCR = false;", " }", " }", " ", " return { line: line, column: column };", " }", " ", " ${initializerCode}", " ", " var result = parseFunctions[startRule]();", " ", " /*", " * The parser is now in one of the following three states:", " *", " * 1. The parser successfully parsed the whole input.", " *", " * - |result !== null|", " * - |pos === input.length|", " * - |rightmostFailuresExpected| may or may not contain something", " *", " * 2. The parser successfully parsed only a part of the input.", " *", " * - |result !== null|", " * - |pos < input.length|", " * - |rightmostFailuresExpected| may or may not contain something", " *", " * 3. The parser did not successfully parse any part of the input.", " *", " * - |result === null|", " * - |pos === 0|", " * - |rightmostFailuresExpected| contains at least one failure", " *", " * All code following this comment (including called functions) must", " * handle these states.", " */", " if (result === null || pos !== input.length) {", " var errorPosition = computeErrorPosition();", " throw new this.SyntaxError(", " buildErrorMessage(),", " errorPosition.line,", " errorPosition.column", " );", " }", " ", " return result;", " },", " ", " /* Returns the parser source code. */", " toSource: function() { return this._source; }", " };", " ", " /* Thrown when a parser encounters a syntax error. */", " ", " result.SyntaxError = function(message, line, column) {", " this.name = 'SyntaxError';", " this.message = message;", " this.line = line;", " this.column = column;", " };", " ", " result.SyntaxError.prototype = Error.prototype;", " ", " return result;", "})()", { initializerCode: initializerCode, parseFunctionTableItems: parseFunctionTableItems.join(",\n"), parseFunctionDefinitions: parseFunctionDefinitions.join("\n\n"), startRule: node.startRule } ); }, initializer: function(node) { return node.code; }, rule: function(node) { var context = { resultIndex: 0, posIndex: 0 }; var resultVars = map(range(node.resultStackDepth), resultVar); var posVars = map(range(node.posStackDepth), posVar); var resultVarsCode = resultVars.length > 0 ? "var " + resultVars.join(", ") + ";" : ""; var posVarsCode = posVars.length > 0 ? "var " + posVars.join(", ") + ";" : ""; if (node.displayName !== null) { var setReportFailuresCode = formatCode( "reportFailures++;" ); var restoreReportFailuresCode = formatCode( "reportFailures--;" ); var reportFailureCode = formatCode( "if (reportFailures === 0 && ${resultVar} === null) {", " matchFailed(${displayName|string});", "}", { displayName: node.displayName, resultVar: resultVar(context.resultIndex) } ); } else { var setReportFailuresCode = ""; var restoreReportFailuresCode = ""; var reportFailureCode = ""; } return formatCode( "function parse_${name}() {", " var cacheKey = '${name}@' + pos;", " var cachedResult = cache[cacheKey];", " if (cachedResult) {", " pos = cachedResult.nextPos;", " return cachedResult.result;", " }", " ", " ${resultVarsCode}", " ${posVarsCode}", " ", " ${setReportFailuresCode}", " ${code}", " ${restoreReportFailuresCode}", " ${reportFailureCode}", " ", " cache[cacheKey] = {", " nextPos: pos,", " result: ${resultVar}", " };", " return ${resultVar};", "}", { name: node.name, resultVarsCode: resultVarsCode, posVarsCode: posVarsCode, setReportFailuresCode: setReportFailuresCode, restoreReportFailuresCode: restoreReportFailuresCode, reportFailureCode: reportFailureCode, code: emit(node.expression, context), resultVar: resultVar(context.resultIndex) } ); }, /* * The contract for all code fragments generated by the following functions * is as follows. * * The code fragment tries to match a part of the input starting with the * position indicated in |pos|. That position may point past the end of the * input. * * * If the code fragment matches the input, it advances |pos| to point to * the first chracter following the matched part of the input and sets * variable with a name computed by calling * |resultVar(context.resultIndex)| to an appropriate value. This value is * always non-|null|. * * * If the code fragment does not match the input, it returns with |pos| * set to the original value and it sets a variable with a name computed * by calling |resultVar(context.resultIndex)| to |null|. * * The code can use variables with names computed by calling * * |resultVar(context.resultIndex + i)| * * and * * |posVar(context.posIndex + i)| * * where |i| >= 1 to store necessary data (return values and positions). It * won't use any other variables. */ choice: function(node, context) { var code, nextAlternativesCode; for (var i = node.alternatives.length - 1; i >= 0; i--) { nextAlternativesCode = i !== node.alternatives.length - 1 ? formatCode( "if (${resultVar} === null) {", " ${code}", "}", { code: code, resultVar: resultVar(context.resultIndex) } ) : ""; code = formatCode( "${currentAlternativeCode}", "${nextAlternativesCode}", { currentAlternativeCode: emit(node.alternatives[i], context), nextAlternativesCode: nextAlternativesCode } ); } return code; }, sequence: function(node, context) { var elementResultVars = map(node.elements, function(element, i) { return resultVar(context.resultIndex + i); }); var code = formatCode( "${resultVar} = ${elementResultVarArray};", { resultVar: resultVar(context.resultIndex), elementResultVarArray: "[" + elementResultVars.join(", ") + "]" } ); var elementContext; for (var i = node.elements.length - 1; i >= 0; i--) { elementContext = { resultIndex: context.resultIndex + i, posIndex: context.posIndex + 1 }; code = formatCode( "${elementCode}", "if (${elementResultVar} !== null) {", " ${code}", "} else {", " ${resultVar} = null;", " pos = ${posVar};", "}", { elementCode: emit(node.elements[i], elementContext), elementResultVar: elementResultVars[i], code: code, posVar: posVar(context.posIndex), resultVar: resultVar(context.resultIndex) } ); } return formatCode( "${posVar} = pos;", "${code}", { code: code, posVar: posVar(context.posIndex) } ); }, labeled: function(node, context) { return emit(node.expression, context); }, simple_and: function(node, context) { var expressionContext = { resultIndex: context.resultIndex, posIndex: context.posIndex + 1 }; return formatCode( "${posVar} = pos;", "reportFailures++;", "${expressionCode}", "reportFailures--;", "if (${resultVar} !== null) {", " ${resultVar} = '';", " pos = ${posVar};", "} else {", " ${resultVar} = null;", "}", { expressionCode: emit(node.expression, expressionContext), posVar: posVar(context.posIndex), resultVar: resultVar(context.resultIndex) } ); }, simple_not: function(node, context) { var expressionContext = { resultIndex: context.resultIndex, posIndex: context.posIndex + 1 }; return formatCode( "${posVar} = pos;", "reportFailures++;", "${expressionCode}", "reportFailures--;", "if (${resultVar} === null) {", " ${resultVar} = '';", "} else {", " ${resultVar} = null;", " pos = ${posVar};", "}", { expressionCode: emit(node.expression, expressionContext), posVar: posVar(context.posIndex), resultVar: resultVar(context.resultIndex) } ); }, semantic_and: function(node, context) { return formatCode( "${resultVar} = (function() {${actionCode}})() ? '' : null;", { actionCode: node.code, resultVar: resultVar(context.resultIndex) } ); }, semantic_not: function(node, context) { return formatCode( "${resultVar} = (function() {${actionCode}})() ? null : '';", { actionCode: node.code, resultVar: resultVar(context.resultIndex) } ); }, optional: function(node, context) { return formatCode( "${expressionCode}", "${resultVar} = ${resultVar} !== null ? ${resultVar} : '';", { expressionCode: emit(node.expression, context), resultVar: resultVar(context.resultIndex) } ); }, zero_or_more: function(node, context) { var expressionContext = { resultIndex: context.resultIndex + 1, posIndex: context.posIndex }; return formatCode( "${resultVar} = [];", "${expressionCode}", "while (${expressionResultVar} !== null) {", " ${resultVar}.push(${expressionResultVar});", " ${expressionCode}", "}", { expressionCode: emit(node.expression, expressionContext), expressionResultVar: resultVar(context.resultIndex + 1), resultVar: resultVar(context.resultIndex) } ); }, one_or_more: function(node, context) { var expressionContext = { resultIndex: context.resultIndex + 1, posIndex: context.posIndex }; return formatCode( "${expressionCode}", "if (${expressionResultVar} !== null) {", " ${resultVar} = [];", " while (${expressionResultVar} !== null) {", " ${resultVar}.push(${expressionResultVar});", " ${expressionCode}", " }", "} else {", " ${resultVar} = null;", "}", { expressionCode: emit(node.expression, expressionContext), expressionResultVar: resultVar(context.resultIndex + 1), resultVar: resultVar(context.resultIndex) } ); }, action: function(node, context) { /* * In case of sequences, we splat their elements into function arguments * one by one. Example: * * start: a:"a" b:"b" c:"c" { alert(arguments.length) } // => 3 * * This behavior is reflected in this function. */ var expressionContext = { resultIndex: context.resultIndex, posIndex: context.posIndex + 1 }; if (node.expression.type === "sequence") { var formalParams = []; var actualParams = []; var elements = node.expression.elements; var elementsLength = elements.length; for (var i = 0; i < elementsLength; i++) { if (elements[i].type === "labeled") { formalParams.push(elements[i].label); actualParams.push(resultVar(context.resultIndex) + "[" + i + "]"); } } } else if (node.expression.type === "labeled") { var formalParams = [node.expression.label]; var actualParams = [resultVar(context.resultIndex)]; } else { var formalParams = []; var actualParams = []; } return formatCode( "${posVar} = pos;", "${expressionCode}", "if (${resultVar} !== null) {", " ${resultVar} = (function(${formalParams}) {${actionCode}})(${actualParams});", "}", "if (${resultVar} === null) {", " pos = ${posVar};", "}", { expressionCode: emit(node.expression, expressionContext), actionCode: node.code, formalParams: formalParams.join(", "), actualParams: actualParams.join(", "), posVar: posVar(context.posIndex), resultVar: resultVar(context.resultIndex) } ); }, rule_ref: function(node, context) { return formatCode( "${resultVar} = ${ruleMethod}();", { ruleMethod: "parse_" + node.name, resultVar: resultVar(context.resultIndex) } ); }, literal: function(node, context) { var length = node.value.length; var testCode = length === 1 ? formatCode( "input.charCodeAt(pos) === ${valueCharCode}", { valueCharCode: node.value.charCodeAt(0) } ) : formatCode( "input.substr(pos, ${length}) === ${value|string}", { value: node.value, length: length } ); return formatCode( "if (${testCode}) {", " ${resultVar} = ${value|string};", " pos += ${length};", "} else {", " ${resultVar} = null;", " if (reportFailures === 0) {", " matchFailed(${valueQuoted|string});", " }", "}", { testCode: testCode, value: node.value, valueQuoted: quote(node.value), length: length, resultVar: resultVar(context.resultIndex) } ); }, any: function(node, context) { return formatCode( "if (input.length > pos) {", " ${resultVar} = input.charAt(pos);", " pos++;", "} else {", " ${resultVar} = null;", " if (reportFailures === 0) {", " matchFailed('any character');", " }", "}", { resultVar: resultVar(context.resultIndex) } ); }, "class": function(node, context) { if (node.parts.length > 0) { var regexp = "/^[" + (node.inverted ? "^" : "") + map(node.parts, function(part) { return part instanceof Array ? quoteForRegexpClass(part[0]) + "-" + quoteForRegexpClass(part[1]) : quoteForRegexpClass(part); }).join("") + "]/"; } else { /* * Stupid IE considers regexps /[]/ and /[^]/ syntactically invalid, so * we translate them into euqivalents it can handle. */ var regexp = node.inverted ? "/^[\\S\\s]/" : "/^(?!)/"; } return formatCode( "if (${regexp}.test(input.charAt(pos))) {", " ${resultVar} = input.charAt(pos);", " pos++;", "} else {", " ${resultVar} = null;", " if (reportFailures === 0) {", " matchFailed(${rawText|string});", " }", "}", { regexp: regexp, rawText: node.rawText, resultVar: resultVar(context.resultIndex) } ); } }); return emit(ast); };