diff --git a/lib/compiler/javascript.js b/lib/compiler/javascript.js index e573358..07948ae 100644 --- a/lib/compiler/javascript.js +++ b/lib/compiler/javascript.js @@ -1,53 +1,8 @@ +function hex(ch) { return ch.charCodeAt(0).toString(16).toUpperCase(); } + /* JavaScript code generation helpers. */ var javascript = { - /* - * Returns a string padded on the left to a desired length with a character. - * - * The code needs to be in sync with the code template in the compilation - * function for "action" nodes. - */ - padLeft: function(input, padding, length) { - var result = input; - - var padLength = length - input.length; - for (var i = 0; i < padLength; i++) { - result = padding + result; - } - - return result; - }, - - /* - * Returns an escape sequence for given character. Uses \x for characters <= - * 0xFF to save space, \u for the rest. - * - * The code needs to be in sync with the code template in the compilation - * function for "action" nodes. - */ - escape: function(ch) { - var charCode = ch.charCodeAt(0); - var escapeChar; - var length; - - if (charCode <= 0xFF) { - escapeChar = 'x'; - length = 2; - } else { - escapeChar = 'u'; - length = 4; - } - - return '\\' + escapeChar + javascript.padLeft(charCode.toString(16).toUpperCase(), '0', length); - }, - - /* - * Surrounds the string with quotes and escapes characters inside so that the - * result is a valid JavaScript string. - * - * The code needs to be in sync with the code template in the compilation - * function for "action" nodes. - */ - quote: function(s) { + stringEscape: function(s) { /* * ECMA-262, 5th ed., 7.8.4: All characters may appear literally in a string * literal except for the closing quote character, backslash, carriage @@ -58,23 +13,21 @@ var javascript = { * Note that "\0" and "\v" escape sequences are not used because JSHint does * not like the first and IE the second. */ - return '"' + s - .replace(/\\/g, '\\\\') // backslash - .replace(/"/g, '\\"') // closing quote character - .replace(/\x08/g, '\\b') // backspace - .replace(/\t/g, '\\t') // horizontal tab - .replace(/\n/g, '\\n') // line feed - .replace(/\f/g, '\\f') // form feed - .replace(/\r/g, '\\r') // carriage return - .replace(/[\x00-\x07\x0B\x0E-\x1F\x80-\uFFFF]/g, javascript.escape) - + '"'; + return s + .replace(/\\/g, '\\\\') // backslash + .replace(/"/g, '\\"') // closing double quote + .replace(/\x08/g, '\\b') // backspace + .replace(/\t/g, '\\t') // horizontal tab + .replace(/\n/g, '\\n') // line feed + .replace(/\f/g, '\\f') // form feed + .replace(/\r/g, '\\r') // carriage return + .replace(/[\x00-\x07\x0B\x0E\x0F]/g, function(ch) { return '\\x0' + hex(ch); }) + .replace(/[\x10-\x1F\x80-\xFF]/g, function(ch) { return '\\x' + hex(ch); }) + .replace(/[\u0180-\u0FFF]/g, function(ch) { return '\\u0' + hex(ch); }) + .replace(/[\u1080-\uFFFF]/g, function(ch) { return '\\u' + hex(ch); }); }, - /* - * Escapes characters inside the string so that it can be used as a list of - * characters in a character class of a regular expression. - */ - quoteForRegexpClass: function(s) { + regexpClassEscape: function(s) { /* * Based on ECMA-262, 5th ed., 7.8.5 & 15.10.1. * @@ -92,7 +45,10 @@ var javascript = { .replace(/\v/g, '\\x0B') // vertical tab .replace(/\f/g, '\\f') // form feed .replace(/\r/g, '\\r') // carriage return - .replace(/[\x01-\x08\x0E-\x1F\x80-\uFFFF]/g, javascript.escape); + .replace(/[\x00-\x08\x0E\x0F]/g, function(ch) { return '\\x0' + hex(ch); }) + .replace(/[\x10-\x1F\x80-\xFF]/g, function(ch) { return '\\x' + hex(ch); }) + .replace(/[\u0100-\u0FFF]/g, function(ch) { return '\\u0' + hex(ch); }) + .replace(/[\u1000-\uFFFF]/g, function(ch) { return '\\u' + hex(ch); }); } }; diff --git a/lib/compiler/passes/generate-bytecode.js b/lib/compiler/passes/generate-bytecode.js index 7588424..4d3e84f 100644 --- a/lib/compiler/passes/generate-bytecode.js +++ b/lib/compiler/passes/generate-bytecode.js @@ -285,7 +285,7 @@ function generateBytecode(ast) { named: function(node, context) { var nameIndex = addConst( - '{ type: "other", description: ' + js.quote(node.name) + ' }' + '{ type: "other", description: "' + js.stringEscape(node.name) + '" }' ); /* @@ -516,15 +516,19 @@ function generateBytecode(ast) { var stringIndex, expectedIndex; if (node.value.length > 0) { - stringIndex = addConst(node.ignoreCase - ? js.quote(node.value.toLowerCase()) - : js.quote(node.value) + stringIndex = addConst('"' + + js.stringEscape( + node.ignoreCase ? node.value.toLowerCase() : node.value + ) + + '"' ); expectedIndex = addConst([ '{', 'type: "literal",', - 'value: ' + js.quote(node.value) + ',', - 'description: ' + js.quote(js.quote(node.value)), + 'value: "' + js.stringEscape(node.value) + '",', + 'description: "' + + js.stringEscape('"' + js.stringEscape(node.value) + '"') + + '"', '}' ].join(' ')); @@ -557,10 +561,10 @@ function generateBytecode(ast) { + (node.inverted ? '^' : '') + arrays.map(node.parts, function(part) { return part instanceof Array - ? js.quoteForRegexpClass(part[0]) + ? js.regexpClassEscape(part[0]) + '-' - + js.quoteForRegexpClass(part[1]) - : js.quoteForRegexpClass(part); + + js.regexpClassEscape(part[1]) + : js.regexpClassEscape(part); }).join('') + ']/' + (node.ignoreCase ? 'i' : ''); } else { @@ -575,8 +579,8 @@ function generateBytecode(ast) { expectedIndex = addConst([ '{', 'type: "class",', - 'value: ' + js.quote(node.rawText) + ',', - 'description: ' + js.quote(node.rawText), + 'value: "' + js.stringEscape(node.rawText) + '",', + 'description: "' + js.stringEscape(node.rawText) + '"', '}' ].join(' ')); diff --git a/lib/compiler/passes/generate-javascript.js b/lib/compiler/passes/generate-javascript.js index 8da5d9a..6e0f894 100644 --- a/lib/compiler/passes/generate-javascript.js +++ b/lib/compiler/passes/generate-javascript.js @@ -22,12 +22,12 @@ function generateJavaScript(ast, options) { indent2(arrays.map( ast.rules, function(rule) { - return 'peg$decode(' - + js.quote(arrays.map( + return 'peg$decode("' + + js.stringEscape(arrays.map( rule.bytecode, function(b) { return String.fromCharCode(b + 32); } ).join('')) - + ')'; + + '")'; } ).join(',\n')), '],'