Make sure quoting functions output only ASCII characters

This patch prevents portability problems. In particular, it fixes a
problem where "SyntaxError: Invalid range in character class." error
appeared when using command-line version on Widnows (see GH-13).
This commit is contained in:
David Majda 2010-11-20 16:58:47 +01:00
parent 4d68812b65
commit aeb2cb4f1c
4 changed files with 123 additions and 31 deletions

View file

@ -119,6 +119,33 @@ PEG.compiler.emitter = function(ast) {
" var rightmostMatchFailuresExpected = [];",
" var cache = {};",
" ",
/* This needs to be in sync with |padLeft| in utils.js. */
" function padLeft(input, padding, length) {",
" var result = input;",
" ",
" var padLength = length - input.length;",
" for (var i = 0; i < padLength; i++) {",
" result = padding + result;",
" }",
" ",
" return result;",
" }",
" ",
/* This needs to be in sync with |escape| in utils.js. */
" function escape(ch) {",
" var charCode = ch.charCodeAt(0);",
" ",
" if (charCode < 0xFF) {",
" var escapeChar = 'x';",
" var length = 2;",
" } else {",
" var escapeChar = 'u';",
" var length = 4;",
" }",
" ",
" return '\\\\' + escapeChar + padLeft(charCode.toString(16).toUpperCase(), '0', length);",
" }",
" ",
/* This needs to be in sync with |quote| in utils.js. */
" function quote(s) {",
" /*",
@ -128,12 +155,11 @@ PEG.compiler.emitter = function(ast) {
" * Any character may appear in the form of an escape sequence.",
" */",
" return '\"' + s",
" .replace(/\\\\/g, '\\\\\\\\') // backslash",
" .replace(/\"/g, '\\\\\"') // closing quote character",
" .replace(/\\r/g, '\\\\r') // carriage return",
" .replace(/\\u2028/g, '\\\\u2028') // line separator",
" .replace(/\\u2029/g, '\\\\u2029') // paragraph separator",
" .replace(/\\n/g, '\\\\n') // line feed",
" .replace(/\\\\/g, '\\\\\\\\') // backslash",
" .replace(/\"/g, '\\\\\"') // closing quote character",
" .replace(/\\r/g, '\\\\r') // carriage return",
" .replace(/\\n/g, '\\\\n') // line feed",
" .replace(/[\\x80-\\uFFFF]/g, escape) // non-ASCII characters",
" + '\"';",
" }",
" ",

View file

@ -15,6 +15,31 @@ PEG.parser = (function(){
var rightmostMatchFailuresExpected = [];
var cache = {};
function padLeft(input, padding, length) {
var result = input;
var padLength = length - input.length;
for (var i = 0; i < padLength; i++) {
result = padding + result;
}
return result;
}
function escape(ch) {
var charCode = ch.charCodeAt(0);
if (charCode < 0xFF) {
var escapeChar = 'x';
var length = 2;
} else {
var escapeChar = 'u';
var length = 4;
}
return '\\' + escapeChar + padLeft(charCode.toString(16).toUpperCase(), '0', length);
}
function quote(s) {
/*
* ECMA-262, 5th ed., 7.8.4: All characters may appear literally in a
@ -23,12 +48,11 @@ PEG.parser = (function(){
* Any character may appear in the form of an escape sequence.
*/
return '"' + s
.replace(/\\/g, '\\\\') // backslash
.replace(/"/g, '\\"') // closing quote character
.replace(/\r/g, '\\r') // carriage return
.replace(/\u2028/g, '\\u2028') // line separator
.replace(/\u2029/g, '\\u2029') // paragraph separator
.replace(/\n/g, '\\n') // line feed
.replace(/\\/g, '\\\\') // backslash
.replace(/"/g, '\\"') // closing quote character
.replace(/\r/g, '\\r') // carriage return
.replace(/\n/g, '\\n') // line feed
.replace(/[\x80-\uFFFF]/g, escape) // non-ASCII characters
+ '"';
}
@ -3404,13 +3428,13 @@ PEG.parser = (function(){
var savedReportMatchFailures = reportMatchFailures;
reportMatchFailures = false;
if (input.substr(pos).match(/^[   - ]/) !== null) {
if (input.substr(pos).match(/^[ \xA0\uFEFF\u1680\u180E\u2000-\u200A\u202F\u205F\u3000]/) !== null) {
var result0 = input.charAt(pos);
pos++;
} else {
var result0 = null;
if (reportMatchFailures) {
matchFailed("[   - ]");
matchFailed("[ \\xA0\\uFEFF\\u1680\\u180E\\u2000-\\u200A\\u202F\\u205F\\u3000]");
}
}
reportMatchFailures = savedReportMatchFailures;

View file

@ -34,6 +34,44 @@ function map(array, callback) {
return result;
}
/*
* Returns a string padded on the left to a desired length with a character.
*
* The code needs to be in sync with th code template in the compilation
* function for "action" nodes.
*/
function padLeft(input, padding, length) {
var result = input;
var padLength = length - input.length;
for (var i = 0; i < padLength; i++) {
result = padding + result;
}
return result;
}
/*
* Returns an escape sequence for given character. Uses \x for characters <=
* 0xFF to save space, \u for the rest.
*
* The code needs to be in sync with th code template in the compilation
* function for "action" nodes.
*/
function escape(ch) {
var charCode = ch.charCodeAt(0);
if (charCode < 0xFF) {
var escapeChar = 'x';
var length = 2;
} else {
var escapeChar = 'u';
var length = 4;
}
return '\\' + escapeChar + padLeft(charCode.toString(16).toUpperCase(), '0', length);
}
/*
* Surrounds the string with quotes and escapes characters inside so that the
* result is a valid JavaScript string.
@ -47,14 +85,15 @@ function quote(s) {
* literal except for the closing quote character, backslash, carriage return,
* line separator, paragraph separator, and line feed. Any character may
* appear in the form of an escape sequence.
*
* For portability, we also escape escape all non-ASCII characters.
*/
return '"' + s
.replace(/\\/g, '\\\\') // backslash
.replace(/"/g, '\\"') // closing quote character
.replace(/\r/g, '\\r') // carriage return
.replace(/\u2028/g, '\\u2028') // line separator
.replace(/\u2029/g, '\\u2029') // paragraph separator
.replace(/\n/g, '\\n') // line feed
.replace(/\\/g, '\\\\') // backslash
.replace(/"/g, '\\"') // closing quote character
.replace(/\r/g, '\\r') // carriage return
.replace(/\n/g, '\\n') // line feed
.replace(/[\x80-\uFFFF]/g, escape) // non-ASCII characters
+ '"';
};
@ -63,17 +102,20 @@ function quote(s) {
* characters in a character class of a regular expression.
*/
function quoteForRegexpClass(s) {
/* Based on ECMA-262, 5th ed., 7.8.5 & 15.10.1. */
/*
* Based on ECMA-262, 5th ed., 7.8.5 & 15.10.1.
*
* For portability, we also escape escape all non-ASCII characters.
*/
return s
.replace(/\\/g, '\\\\') // backslash
.replace(/\0/g, '\\0') // null, IE needs this
.replace(/\//g, '\\/') // closing slash
.replace(/]/g, '\\]') // closing bracket
.replace(/-/g, '\\-') // dash
.replace(/\r/g, '\\r') // carriage return
.replace(/\u2028/g, '\\u2028') // line separator
.replace(/\u2029/g, '\\u2029') // paragraph separator
.replace(/\n/g, '\\n') // line feed
.replace(/\\/g, '\\\\') // backslash
.replace(/\0/g, '\\0') // null, IE needs this
.replace(/\//g, '\\/') // closing slash
.replace(/]/g, '\\]') // closing bracket
.replace(/-/g, '\\-') // dash
.replace(/\r/g, '\\r') // carriage return
.replace(/\n/g, '\\n') // line feed
.replace(/[\x80-\uFFFF]/g, escape) // non-ASCII characters
}
/*

View file

@ -435,7 +435,7 @@ test("parses bracketDelimitedCharacter", function() {
parserParses("start = [\\n]", classGrammar(false, ["\n"], "[\\n]"));
parserParses("start = [\\0]", classGrammar(false, ["\0"], "[\\0]"));
parserParses("start = [\\x00]", classGrammar(false, ["\0"], "[\\0]"));
parserParses("start = [\\u0120]", classGrammar(false, ["\u0120"], "[\u0120]"));
parserParses("start = [\\u0120]", classGrammar(false, ["\u0120"], "[\\u0120]"));
parserParses("start = [\\\n]", classGrammar(false, ["\n"], "[\\n]"));
});