diff --git a/Makefile b/Makefile index 164f580..32a18a0 100644 --- a/Makefile +++ b/Makefile @@ -2,6 +2,21 @@ PEGJS_VERSION = `cat $(VERSION_FILE)` +# ===== Modules ===== + +# Order matters -- dependencies must be listed before modules dependent on them. +MODULES = utils \ + grammar-error \ + parser \ + compiler/passes/allocate-registers \ + compiler/passes/generate-code \ + compiler/passes/remove-proxy-rules \ + compiler/passes/report-left-recursion \ + compiler/passes/report-missing-rules \ + compiler/passes \ + compiler \ + peg + # ===== Directories ===== SRC_DIR = src @@ -39,52 +54,64 @@ JASMINE_NODE = jasmine-node PEGJS = $(BIN_DIR)/pegjs BENCHMARK_RUN = $(BENCHMARK_DIR)/run -# ===== Preprocessor ===== - -# A simple preprocessor that recognizes two directives: -# -# @VERSION -- insert PEG.js version -# @include "" -- include here -# -# This could have been implemented many ways. I chose Perl because everyone will -# have it. -PREPROCESS=perl -e ' \ - use strict; \ - use warnings; \ - \ - use File::Basename; \ - \ - sub preprocess { \ - my $$file = shift; \ - my $$output = ""; \ - \ - open(my $$f, $$file) or die "Can\x27t open $$file: $$!"; \ - while(<$$f>) { \ - if (/^\s*\/\/\s*\@include\s*"([^"]*)"\s*$$/) { \ - $$output .= preprocess(dirname($$file) . "/" . $$1); \ - next; \ - } \ - \ - $$output .= $$_; \ - } \ - close($$f); \ - \ - return $$output; \ - } \ - \ - print preprocess($$ARGV[0]); \ -' - # ===== Targets ===== # Generate the grammar parser parser: - $(PEGJS) --export-var PEG.parser $(PARSER_SRC_FILE) $(PARSER_OUT_FILE) + $(PEGJS) $(PARSER_SRC_FILE) $(PARSER_OUT_FILE) # Build the PEG.js library build: mkdir -p $(LIB_DIR) - $(PREPROCESS) $(PEGJS_SRC_FILE) > $(PEGJS_LIB_FILE) + rm -f $(PEGJS_LIB_FILE) + + # The following code is inspired by CoffeeScript's Cakefile. + + echo '/*' >> $(PEGJS_LIB_FILE) + echo " * PEG.js $(PEGJS_VERSION)" >> $(PEGJS_LIB_FILE) + echo ' *' >> $(PEGJS_LIB_FILE) + echo ' * http://pegjs.majda.cz/' >> $(PEGJS_LIB_FILE) + echo ' *' >> $(PEGJS_LIB_FILE) + echo ' * Copyright (c) 2010-2012 David Majda' >> $(PEGJS_LIB_FILE) + echo ' * Licensed under the MIT license' >> $(PEGJS_LIB_FILE) + echo ' */' >> $(PEGJS_LIB_FILE) + echo 'var PEG = (function(undefined) {' >> $(PEGJS_LIB_FILE) + echo ' var modules = {' >> $(PEGJS_LIB_FILE) + echo ' define: function(name, factory) {' >> $(PEGJS_LIB_FILE) + echo ' var dir = name.replace(/(^|\/)[^/]+$$/, "$$1"),' >> $(PEGJS_LIB_FILE) + echo ' module = { exports: {} };' >> $(PEGJS_LIB_FILE) + echo '' >> $(PEGJS_LIB_FILE) + echo ' function require(path) {' >> $(PEGJS_LIB_FILE) + echo ' var name = dir + path,' >> $(PEGJS_LIB_FILE) + echo ' regexp = /[^\/]+\/\.\.\/|\.\//;' >> $(PEGJS_LIB_FILE) + echo '' >> $(PEGJS_LIB_FILE) + echo " /* Can't use /.../g because we can move backwards in the string. */" >> $(PEGJS_LIB_FILE) + echo ' while (regexp.test(name)) {' >> $(PEGJS_LIB_FILE) + echo ' name = name.replace(regexp, "");' >> $(PEGJS_LIB_FILE) + echo ' }' >> $(PEGJS_LIB_FILE) + echo '' >> $(PEGJS_LIB_FILE) + echo ' return modules[name];' >> $(PEGJS_LIB_FILE) + echo ' }' >> $(PEGJS_LIB_FILE) + echo '' >> $(PEGJS_LIB_FILE) + echo ' factory(module, require);' >> $(PEGJS_LIB_FILE) + echo ' this[name] = module.exports;' >> $(PEGJS_LIB_FILE) + echo ' }' >> $(PEGJS_LIB_FILE) + echo ' };' >> $(PEGJS_LIB_FILE) + echo '' >> $(PEGJS_LIB_FILE) + + for module in $(MODULES); do \ + echo " modules.define(\"$$module\", function(module, require) {" >> $(PEGJS_LIB_FILE); \ + sed -e 's/^/ /' src/$$module.js >> $(PEGJS_LIB_FILE); \ + echo ' });' >> $(PEGJS_LIB_FILE); \ + echo '' >> $(PEGJS_LIB_FILE); \ + done + + echo ' return modules["peg"]' >> $(PEGJS_LIB_FILE) + echo '})();' >> $(PEGJS_LIB_FILE) + echo '' >> $(PEGJS_LIB_FILE) + echo 'if (typeof module !== "undefined") {' >> $(PEGJS_LIB_FILE) + echo ' module.exports = PEG;' >> $(PEGJS_LIB_FILE) + echo '}' >> $(PEGJS_LIB_FILE) # Remove built PEG.js library (created by "build") clean: diff --git a/src/compiler.js b/src/compiler.js index 3ba1a01..1fcbb7f 100644 --- a/src/compiler.js +++ b/src/compiler.js @@ -1,4 +1,8 @@ -PEG.compiler = { +var utils = require("./utils"); + +module.exports = { + passes: require("./compiler/passes"), + /* * Names of passes that will get run during the compilation (in the specified * order). @@ -20,7 +24,7 @@ PEG.compiler = { compile: function(ast, options) { var that = this; - each(this.appliedPassNames, function(passName) { + utils.each(this.appliedPassNames, function(passName) { that.passes[passName](ast, options); }); @@ -31,5 +35,3 @@ PEG.compiler = { return result; } }; - -// @include "compiler/passes.js" diff --git a/src/compiler/passes.js b/src/compiler/passes.js index 2e4db35..a4c3723 100644 --- a/src/compiler/passes.js +++ b/src/compiler/passes.js @@ -5,10 +5,10 @@ * or modify it as needed. If the pass encounters a semantic error, it throws * |PEG.GrammarError|. */ -PEG.compiler.passes = {}; - -// @include "passes/report-missing-rules.js" -// @include "passes/report-left-recursion.js" -// @include "passes/remove-proxy-rules.js" -// @include "passes/allocate-registers.js" -// @include "passes/generate-code.js" +module.exports = { + reportMissingRules: require("./passes/report-missing-rules"), + reportLeftRecursion: require("./passes/report-left-recursion"), + removeProxyRules: require("./passes/remove-proxy-rules"), + allocateRegisters: require("./passes/allocate-registers"), + generateCode: require("./passes/generate-code") +}; diff --git a/src/compiler/passes/allocate-registers.js b/src/compiler/passes/allocate-registers.js index 23e74cb..b8a1ee3 100644 --- a/src/compiler/passes/allocate-registers.js +++ b/src/compiler/passes/allocate-registers.js @@ -1,3 +1,5 @@ +var utils = require("../../utils"); + /* * Allocates registers that the generated code for each node will use to store * match results and parse positions. For "action", "semantic_and" and @@ -23,7 +25,7 @@ * point of action/predicate code execution to registers that will contain * the labeled values. */ -PEG.compiler.passes.allocateRegisters = function(ast) { +module.exports = function(ast) { /* * Register allocator that allocates registers from an unlimited * integer-indexed pool. It allows allocating and releaseing registers in any @@ -151,10 +153,10 @@ PEG.compiler.passes.allocateRegisters = function(ast) { node.params = vars.buildParams(); } - var compute = buildNodeVisitor({ + var compute = utils.buildNodeVisitor({ grammar: function(node) { - each(node.rules, compute); + utils.each(node.rules, compute); }, rule: @@ -172,7 +174,7 @@ PEG.compiler.passes.allocateRegisters = function(ast) { choice: function(node) { - each(node.alternatives, function(alternative) { + utils.each(node.alternatives, function(alternative) { reuseResult(node, alternative); scoped(function() { compute(alternative); @@ -194,11 +196,11 @@ PEG.compiler.passes.allocateRegisters = function(ast) { sequence: function(node) { savePos(node, function() { - each(node.elements, function(element) { + utils.each(node.elements, function(element) { element.resultIndex = registers.alloc(); compute(element); }); - each(node.elements, function(element) { + utils.each(node.elements, function(element) { registers.release(element.resultIndex); }); }); diff --git a/src/compiler/passes/generate-code.js b/src/compiler/passes/generate-code.js index dad8d37..5fea549 100644 --- a/src/compiler/passes/generate-code.js +++ b/src/compiler/passes/generate-code.js @@ -1,7 +1,9 @@ +var utils = require("../../utils"); + /* Generates the parser code. */ -PEG.compiler.passes.generateCode = function(ast, options) { - options = clone(options) || {}; - defaults(options, { +module.exports = function(ast, options) { + options = utils.clone(options) || {}; + utils.defaults(options, { cache: false, trackLineAndColumn: false, allowedStartRules: [ast.startRule] @@ -751,12 +753,12 @@ PEG.compiler.passes.generateCode = function(ast, options) { })(); function fill(name, vars) { - vars.string = quote; - vars.range = range; - vars.map = map; - vars.pluck = pluck; - vars.keys = keys; - vars.values = values; + vars.string = utils.quote; + vars.range = utils.range; + vars.map = utils.map; + vars.pluck = utils.pluck; + vars.keys = utils.keys; + vars.values = utils.values; vars.emit = emit; vars.options = options; @@ -797,7 +799,7 @@ PEG.compiler.passes.generateCode = function(ast, options) { return function(node) { return fill(name, { node: node }); }; } - var emit = buildNodeVisitor({ + var emit = utils.buildNodeVisitor({ grammar: emitSimple("grammar"), initializer: function(node) { return node.code; }, @@ -878,12 +880,12 @@ PEG.compiler.passes.generateCode = function(ast, options) { if (node.parts.length > 0) { regexp = '/^[' + (node.inverted ? '^' : '') - + map(node.parts, function(part) { + + utils.map(node.parts, function(part) { return part instanceof Array - ? quoteForRegexpClass(part[0]) + ? utils.quoteForRegexpClass(part[0]) + '-' - + quoteForRegexpClass(part[1]) - : quoteForRegexpClass(part); + + utils.quoteForRegexpClass(part[1]) + : utils.quoteForRegexpClass(part); }).join('') + ']/' + (node.ignoreCase ? 'i' : ''); } else { diff --git a/src/compiler/passes/remove-proxy-rules.js b/src/compiler/passes/remove-proxy-rules.js index cc82d34..6c2b4bd 100644 --- a/src/compiler/passes/remove-proxy-rules.js +++ b/src/compiler/passes/remove-proxy-rules.js @@ -1,7 +1,9 @@ +var utils = require("../../utils"); + /* * Removes proxy rules -- that is, rules that only delegate to other rule. */ -PEG.compiler.passes.removeProxyRules = function(ast) { +module.exports = function(ast) { function isProxyRule(node) { return node.type === "rule" && node.expression.type === "rule_ref"; } @@ -15,13 +17,13 @@ PEG.compiler.passes.removeProxyRules = function(ast) { function replaceInSubnodes(propertyName) { return function(node, from, to) { - each(node[propertyName], function(subnode) { + utils.each(node[propertyName], function(subnode) { replace(subnode, from, to); }); }; } - var replace = buildNodeVisitor({ + var replace = utils.buildNodeVisitor({ grammar: replaceInSubnodes("rules"), rule: replaceInExpression, named: replaceInExpression, @@ -54,7 +56,7 @@ PEG.compiler.passes.removeProxyRules = function(ast) { var indices = []; - each(ast.rules, function(rule, i) { + utils.each(ast.rules, function(rule, i) { if (isProxyRule(rule)) { replaceRuleRefs(ast, rule.name, rule.expression.name); if (rule.name === ast.startRule) { @@ -66,7 +68,7 @@ PEG.compiler.passes.removeProxyRules = function(ast) { indices.reverse(); - each(indices, function(index) { + utils.each(indices, function(index) { ast.rules.splice(index, 1); }); }; diff --git a/src/compiler/passes/report-left-recursion.js b/src/compiler/passes/report-left-recursion.js index 60251e8..045a013 100644 --- a/src/compiler/passes/report-left-recursion.js +++ b/src/compiler/passes/report-left-recursion.js @@ -1,5 +1,7 @@ +var utils = require("../../utils"); + /* Checks that no left recursion is present. */ -PEG.compiler.passes.reportLeftRecursion = function(ast) { +module.exports = function(ast) { function nop() {} function checkExpression(node, appliedRules) { @@ -8,13 +10,13 @@ PEG.compiler.passes.reportLeftRecursion = function(ast) { function checkSubnodes(propertyName) { return function(node, appliedRules) { - each(node[propertyName], function(subnode) { + utils.each(node[propertyName], function(subnode) { check(subnode, appliedRules); }); }; } - var check = buildNodeVisitor({ + var check = utils.buildNodeVisitor({ grammar: checkSubnodes("rules"), rule: @@ -44,12 +46,12 @@ PEG.compiler.passes.reportLeftRecursion = function(ast) { rule_ref: function(node, appliedRules) { - if (contains(appliedRules, node.name)) { + if (utils.contains(appliedRules, node.name)) { throw new PEG.GrammarError( "Left recursion detected for rule \"" + node.name + "\"." ); } - check(findRuleByName(ast, node.name), appliedRules); + check(utils.findRuleByName(ast, node.name), appliedRules); }, literal: nop, diff --git a/src/compiler/passes/report-missing-rules.js b/src/compiler/passes/report-missing-rules.js index 0a14877..9289f3a 100644 --- a/src/compiler/passes/report-missing-rules.js +++ b/src/compiler/passes/report-missing-rules.js @@ -1,14 +1,16 @@ +var utils = require("../../utils"); + /* Checks that all referenced rules exist. */ -PEG.compiler.passes.reportMissingRules = function(ast) { +module.exports = function(ast) { function nop() {} function checkExpression(node) { check(node.expression); } function checkSubnodes(propertyName) { - return function(node) { each(node[propertyName], check); }; + return function(node) { utils.each(node[propertyName], check); }; } - var check = buildNodeVisitor({ + var check = utils.buildNodeVisitor({ grammar: checkSubnodes("rules"), rule: checkExpression, named: checkExpression, @@ -26,7 +28,7 @@ PEG.compiler.passes.reportMissingRules = function(ast) { rule_ref: function(node) { - if (!findRuleByName(ast, node.name)) { + if (!utils.findRuleByName(ast, node.name)) { throw new PEG.GrammarError( "Referenced rule \"" + node.name + "\" does not exist." ); diff --git a/src/grammar-error.js b/src/grammar-error.js new file mode 100644 index 0000000..91e3f62 --- /dev/null +++ b/src/grammar-error.js @@ -0,0 +1,9 @@ +var utils = require("./utils"); + +/* Thrown when the grammar contains an error. */ +module.exports = function(message) { + this.name = "GrammarError"; + this.message = message; +}; + +utils.subclass(module.exports, Error); diff --git a/src/parser.js b/src/parser.js index b3d6268..8b47929 100644 --- a/src/parser.js +++ b/src/parser.js @@ -1,4 +1,4 @@ -PEG.parser = (function(){ +module.exports = (function(){ /* * Generated by PEG.js 0.7.0. * @@ -298,7 +298,7 @@ PEG.parser = (function(){ if (r0 !== null) { r0 = (function(offset, head, tail) { if (tail.length > 0) { - var alternatives = [head].concat(map( + var alternatives = [head].concat(utils.map( tail, function(element) { return element[1]; } )); @@ -1869,10 +1869,10 @@ PEG.parser = (function(){ } if (r0 !== null) { r0 = (function(offset, inverted, parts, flags) { - var partsConverted = map(parts, function(part) { return part.data; }); + var partsConverted = utils.map(parts, function(part) { return part.data; }); var rawText = "[" + inverted - + map(parts, function(part) { return part.rawText; }).join("") + + utils.map(parts, function(part) { return part.rawText; }).join("") + "]" + flags; @@ -1959,7 +1959,7 @@ PEG.parser = (function(){ return { data: char_, // FIXME: Get the raw text from the input directly. - rawText: quoteForRegexpClass(char_) + rawText: utils.quoteForRegexpClass(char_) }; })(r1, r0); } @@ -2796,6 +2796,9 @@ PEG.parser = (function(){ } + var utils = require("./utils"); + + var result = parseFunctions[startRule](); /* diff --git a/src/parser.pegjs b/src/parser.pegjs index fb31398..86b2c43 100644 --- a/src/parser.pegjs +++ b/src/parser.pegjs @@ -1,3 +1,7 @@ +{ + var utils = require("./utils"); +} + grammar = __ initializer:initializer? rules:rule+ { return { @@ -37,7 +41,7 @@ expression choice = head:sequence tail:(slash sequence)* { if (tail.length > 0) { - var alternatives = [head].concat(map( + var alternatives = [head].concat(utils.map( tail, function(element) { return element[1]; } )); @@ -238,10 +242,10 @@ simpleSingleQuotedCharacter class "character class" = "[" inverted:"^"? parts:(classCharacterRange / classCharacter)* "]" flags:"i"? __ { - var partsConverted = map(parts, function(part) { return part.data; }); + var partsConverted = utils.map(parts, function(part) { return part.data; }); var rawText = "[" + inverted - + map(parts, function(part) { return part.rawText; }).join("") + + utils.map(parts, function(part) { return part.rawText; }).join("") + "]" + flags; @@ -275,7 +279,7 @@ classCharacter return { data: char_, // FIXME: Get the raw text from the input directly. - rawText: quoteForRegexpClass(char_) + rawText: utils.quoteForRegexpClass(char_) }; } diff --git a/src/peg.js b/src/peg.js index 27610cb..ff4e724 100644 --- a/src/peg.js +++ b/src/peg.js @@ -1,19 +1,11 @@ -/* - * PEG.js 0.7.0 - * - * http://pegjs.majda.cz/ - * - * Copyright (c) 2010-2012 David Majda - * Licensed under the MIT license. - */ -var PEG = (function(undefined) { - -// @include "utils.js" - -var PEG = { +module.exports = { /* PEG.js version (uses semantic versioning). */ VERSION: "0.7.0", + GrammarError: require("./grammar-error"), + parser: require("./parser"), + compiler: require("./compiler"), + /* * Generates a parser from a specified grammar and returns it. * @@ -26,26 +18,6 @@ var PEG = { * generated parser and cause its malfunction. */ buildParser: function(grammar, options) { - return PEG.compiler.compile(PEG.parser.parse(grammar), options); + return this.compiler.compile(this.parser.parse(grammar), options); } }; - -/* Thrown when the grammar contains an error. */ - -PEG.GrammarError = function(message) { - this.name = "GrammarError"; - this.message = message; -}; - -subclass(PEG.GrammarError, Error); - -// @include "parser.js" -// @include "compiler.js" - -return PEG; - -})(); - -if (typeof module !== "undefined") { - module.exports = PEG; -} diff --git a/src/utils.js b/src/utils.js index e97a3da..9d50d82 100644 --- a/src/utils.js +++ b/src/utils.js @@ -1,209 +1,214 @@ -/* Like Python's |range|, but without |step|. */ -function range(start, stop) { - if (stop === undefined) { - stop = start; - start = 0; - } +var utils = { + /* Like Python's |range|, but without |step|. */ + range: function(start, stop) { + if (stop === undefined) { + stop = start; + start = 0; + } - var result = new Array(Math.max(0, stop - start)); - for (var i = 0, j = start; j < stop; i++, j++) { - result[i] = j; - } - return result; -} - -function find(array, callback) { - var length = array.length; - for (var i = 0; i < length; i++) { - if (callback(array[i])) { - return array[i]; + var result = new Array(Math.max(0, stop - start)); + for (var i = 0, j = start; j < stop; i++, j++) { + result[i] = j; } - } -} + return result; + }, + + find: function(array, callback) { + var length = array.length; + for (var i = 0; i < length; i++) { + if (callback(array[i])) { + return array[i]; + } + } + }, + + contains: function(array, value) { + /* + * Stupid IE does not have Array.prototype.indexOf, otherwise this function + * would be a one-liner. + */ + var length = array.length; + for (var i = 0; i < length; i++) { + if (array[i] === value) { + return true; + } + } + return false; + }, -function contains(array, value) { - /* - * Stupid IE does not have Array.prototype.indexOf, otherwise this function - * would be a one-liner. - */ - var length = array.length; - for (var i = 0; i < length; i++) { - if (array[i] === value) { - return true; + each: function(array, callback) { + var length = array.length; + for (var i = 0; i < length; i++) { + callback(array[i], i); } - } - return false; -} + }, -function each(array, callback) { - var length = array.length; - for (var i = 0; i < length; i++) { - callback(array[i], i); - } -} + map: function(array, callback) { + var result = []; + var length = array.length; + for (var i = 0; i < length; i++) { + result[i] = callback(array[i], i); + } + return result; + }, -function map(array, callback) { - var result = []; - var length = array.length; - for (var i = 0; i < length; i++) { - result[i] = callback(array[i], i); - } - return result; -} + pluck: function(array, key) { + return utils.map(array, function (e) { return e[key]; }); + }, -function pluck(array, key) { - return map(array, function (e) { return e[key]; }); -} + keys: function(object) { + var result = []; + for (var key in object) { + result.push(key); + } + return result; + }, -function keys(object) { - var result = []; - for (var key in object) { - result.push(key); - } - return result; -} + values: function(object) { + var result = []; + for (var key in object) { + result.push(object[key]); + } + return result; + }, -function values(object) { - var result = []; - for (var key in object) { - result.push(object[key]); - } - return result; -} + clone: function(object) { + var result = {}; + for (var key in object) { + result[key] = object[key]; + } + return result; + }, + + defaults: function(object, defaults) { + for (var key in defaults) { + if (object[key] === undefined) { + object[key] = defaults[key]; + } + } + }, -function clone(object) { - var result = {}; - for (var key in object) { - result[key] = object[key]; - } - return result; -} + /* + * The code needs to be in sync with the code template in the compilation + * function for "action" nodes. + */ + subclass: function(child, parent) { + function ctor() { this.constructor = child; } + ctor.prototype = parent.prototype; + child.prototype = new ctor(); + }, + + /* + * Returns a string padded on the left to a desired length with a character. + * + * The code needs to be in sync with the code template in the compilation + * function for "action" nodes. + */ + padLeft: function(input, padding, length) { + var result = input; -function defaults(object, defaults) { - for (var key in defaults) { - if (object[key] === undefined) { - object[key] = defaults[key]; + var padLength = length - input.length; + for (var i = 0; i < padLength; i++) { + result = padding + result; } - } -} - -/* - * The code needs to be in sync with the code template in the compilation - * function for "action" nodes. - */ -function subclass(child, parent) { - function ctor() { this.constructor = child; } - ctor.prototype = parent.prototype; - child.prototype = new ctor(); -} - -/* - * Returns a string padded on the left to a desired length with a character. - * - * The code needs to be in sync with the code template in the compilation - * function for "action" nodes. - */ -function padLeft(input, padding, length) { - var result = input; - - var padLength = length - input.length; - for (var i = 0; i < padLength; i++) { - result = padding + result; - } - return result; -} - -/* - * Returns an escape sequence for given character. Uses \x for characters <= - * 0xFF to save space, \u for the rest. - * - * The code needs to be in sync with the code template in the compilation - * function for "action" nodes. - */ -function escape(ch) { - var charCode = ch.charCodeAt(0); - var escapeChar; - var length; - - if (charCode <= 0xFF) { - escapeChar = 'x'; - length = 2; - } else { - escapeChar = 'u'; - length = 4; - } + return result; + }, - return '\\' + escapeChar + padLeft(charCode.toString(16).toUpperCase(), '0', length); -} - -/* - * Surrounds the string with quotes and escapes characters inside so that the - * result is a valid JavaScript string. - * - * The code needs to be in sync with the code template in the compilation - * function for "action" nodes. - */ -function quote(s) { /* - * ECMA-262, 5th ed., 7.8.4: All characters may appear literally in a string - * literal except for the closing quote character, backslash, carriage return, - * line separator, paragraph separator, and line feed. Any character may - * appear in the form of an escape sequence. + * Returns an escape sequence for given character. Uses \x for characters <= + * 0xFF to save space, \u for the rest. * - * For portability, we also escape escape all control and non-ASCII - * characters. Note that "\0" and "\v" escape sequences are not used because - * JSHint does not like the first and IE the second. + * The code needs to be in sync with the code template in the compilation + * function for "action" nodes. */ - return '"' + s - .replace(/\\/g, '\\\\') // backslash - .replace(/"/g, '\\"') // closing quote character - .replace(/\x08/g, '\\b') // backspace - .replace(/\t/g, '\\t') // horizontal tab - .replace(/\n/g, '\\n') // line feed - .replace(/\f/g, '\\f') // form feed - .replace(/\r/g, '\\r') // carriage return - .replace(/[\x00-\x07\x0B\x0E-\x1F\x80-\uFFFF]/g, escape) - + '"'; -} - -/* - * Escapes characters inside the string so that it can be used as a list of - * characters in a character class of a regular expression. - */ -function quoteForRegexpClass(s) { + escape: function(ch) { + var charCode = ch.charCodeAt(0); + var escapeChar; + var length; + + if (charCode <= 0xFF) { + escapeChar = 'x'; + length = 2; + } else { + escapeChar = 'u'; + length = 4; + } + + return '\\' + escapeChar + utils.padLeft(charCode.toString(16).toUpperCase(), '0', length); + }, + /* - * Based on ECMA-262, 5th ed., 7.8.5 & 15.10.1. + * Surrounds the string with quotes and escapes characters inside so that the + * result is a valid JavaScript string. * - * For portability, we also escape escape all control and non-ASCII - * characters. + * The code needs to be in sync with the code template in the compilation + * function for "action" nodes. */ - return s - .replace(/\\/g, '\\\\') // backslash - .replace(/\//g, '\\/') // closing slash - .replace(/\]/g, '\\]') // closing bracket - .replace(/-/g, '\\-') // dash - .replace(/\0/g, '\\0') // null - .replace(/\t/g, '\\t') // horizontal tab - .replace(/\n/g, '\\n') // line feed - .replace(/\v/g, '\\x0B') // vertical tab - .replace(/\f/g, '\\f') // form feed - .replace(/\r/g, '\\r') // carriage return - .replace(/[\x01-\x08\x0E-\x1F\x80-\uFFFF]/g, escape); -} - -/* - * Builds a node visitor -- a function which takes a node and any number of - * other parameters, calls an appropriate function according to the node type, - * passes it all its parameters and returns its value. The functions for various - * node types are passed in a parameter to |buildNodeVisitor| as a hash. - */ -function buildNodeVisitor(functions) { - return function(node) { - return functions[node.type].apply(null, arguments); - }; -} - -function findRuleByName(ast, name) { - return find(ast.rules, function(r) { return r.name === name; }); -} + quote: function(s) { + /* + * ECMA-262, 5th ed., 7.8.4: All characters may appear literally in a string + * literal except for the closing quote character, backslash, carriage + * return, line separator, paragraph separator, and line feed. Any character + * may appear in the form of an escape sequence. + * + * For portability, we also escape escape all control and non-ASCII + * characters. Note that "\0" and "\v" escape sequences are not used because + * JSHint does not like the first and IE the second. + */ + return '"' + s + .replace(/\\/g, '\\\\') // backslash + .replace(/"/g, '\\"') // closing quote character + .replace(/\x08/g, '\\b') // backspace + .replace(/\t/g, '\\t') // horizontal tab + .replace(/\n/g, '\\n') // line feed + .replace(/\f/g, '\\f') // form feed + .replace(/\r/g, '\\r') // carriage return + .replace(/[\x00-\x07\x0B\x0E-\x1F\x80-\uFFFF]/g, utils.escape) + + '"'; + }, + + /* + * Escapes characters inside the string so that it can be used as a list of + * characters in a character class of a regular expression. + */ + quoteForRegexpClass: function(s) { + /* + * Based on ECMA-262, 5th ed., 7.8.5 & 15.10.1. + * + * For portability, we also escape escape all control and non-ASCII + * characters. + */ + return s + .replace(/\\/g, '\\\\') // backslash + .replace(/\//g, '\\/') // closing slash + .replace(/\]/g, '\\]') // closing bracket + .replace(/-/g, '\\-') // dash + .replace(/\0/g, '\\0') // null + .replace(/\t/g, '\\t') // horizontal tab + .replace(/\n/g, '\\n') // line feed + .replace(/\v/g, '\\x0B') // vertical tab + .replace(/\f/g, '\\f') // form feed + .replace(/\r/g, '\\r') // carriage return + .replace(/[\x01-\x08\x0E-\x1F\x80-\uFFFF]/g, utils.escape); + }, + + /* + * Builds a node visitor -- a function which takes a node and any number of + * other parameters, calls an appropriate function according to the node type, + * passes it all its parameters and returns its value. The functions for + * various node types are passed in a parameter to |buildNodeVisitor| as a + * hash. + */ + buildNodeVisitor: function(functions) { + return function(node) { + return functions[node.type].apply(null, arguments); + }; + }, + + findRuleByName: function(ast, name) { + return utils.find(ast.rules, function(r) { return r.name === name; }); + } +}; + +module.exports = utils;