Make sure quoting functions output only ASCII characters
This patch prevents portability problems. In particular, it fixes a problem where "SyntaxError: Invalid range in character class." error appeared when using command-line version on Widnows (see GH-13).
This commit is contained in:
parent
4d68812b65
commit
aeb2cb4f1c
|
@ -119,6 +119,33 @@ PEG.compiler.emitter = function(ast) {
|
|||
" var rightmostMatchFailuresExpected = [];",
|
||||
" var cache = {};",
|
||||
" ",
|
||||
/* This needs to be in sync with |padLeft| in utils.js. */
|
||||
" function padLeft(input, padding, length) {",
|
||||
" var result = input;",
|
||||
" ",
|
||||
" var padLength = length - input.length;",
|
||||
" for (var i = 0; i < padLength; i++) {",
|
||||
" result = padding + result;",
|
||||
" }",
|
||||
" ",
|
||||
" return result;",
|
||||
" }",
|
||||
" ",
|
||||
/* This needs to be in sync with |escape| in utils.js. */
|
||||
" function escape(ch) {",
|
||||
" var charCode = ch.charCodeAt(0);",
|
||||
" ",
|
||||
" if (charCode < 0xFF) {",
|
||||
" var escapeChar = 'x';",
|
||||
" var length = 2;",
|
||||
" } else {",
|
||||
" var escapeChar = 'u';",
|
||||
" var length = 4;",
|
||||
" }",
|
||||
" ",
|
||||
" return '\\\\' + escapeChar + padLeft(charCode.toString(16).toUpperCase(), '0', length);",
|
||||
" }",
|
||||
" ",
|
||||
/* This needs to be in sync with |quote| in utils.js. */
|
||||
" function quote(s) {",
|
||||
" /*",
|
||||
|
@ -128,12 +155,11 @@ PEG.compiler.emitter = function(ast) {
|
|||
" * Any character may appear in the form of an escape sequence.",
|
||||
" */",
|
||||
" return '\"' + s",
|
||||
" .replace(/\\\\/g, '\\\\\\\\') // backslash",
|
||||
" .replace(/\"/g, '\\\\\"') // closing quote character",
|
||||
" .replace(/\\r/g, '\\\\r') // carriage return",
|
||||
" .replace(/\\u2028/g, '\\\\u2028') // line separator",
|
||||
" .replace(/\\u2029/g, '\\\\u2029') // paragraph separator",
|
||||
" .replace(/\\n/g, '\\\\n') // line feed",
|
||||
" .replace(/\\\\/g, '\\\\\\\\') // backslash",
|
||||
" .replace(/\"/g, '\\\\\"') // closing quote character",
|
||||
" .replace(/\\r/g, '\\\\r') // carriage return",
|
||||
" .replace(/\\n/g, '\\\\n') // line feed",
|
||||
" .replace(/[\\x80-\\uFFFF]/g, escape) // non-ASCII characters",
|
||||
" + '\"';",
|
||||
" }",
|
||||
" ",
|
||||
|
|
|
@ -15,6 +15,31 @@ PEG.parser = (function(){
|
|||
var rightmostMatchFailuresExpected = [];
|
||||
var cache = {};
|
||||
|
||||
function padLeft(input, padding, length) {
|
||||
var result = input;
|
||||
|
||||
var padLength = length - input.length;
|
||||
for (var i = 0; i < padLength; i++) {
|
||||
result = padding + result;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
function escape(ch) {
|
||||
var charCode = ch.charCodeAt(0);
|
||||
|
||||
if (charCode < 0xFF) {
|
||||
var escapeChar = 'x';
|
||||
var length = 2;
|
||||
} else {
|
||||
var escapeChar = 'u';
|
||||
var length = 4;
|
||||
}
|
||||
|
||||
return '\\' + escapeChar + padLeft(charCode.toString(16).toUpperCase(), '0', length);
|
||||
}
|
||||
|
||||
function quote(s) {
|
||||
/*
|
||||
* ECMA-262, 5th ed., 7.8.4: All characters may appear literally in a
|
||||
|
@ -23,12 +48,11 @@ PEG.parser = (function(){
|
|||
* Any character may appear in the form of an escape sequence.
|
||||
*/
|
||||
return '"' + s
|
||||
.replace(/\\/g, '\\\\') // backslash
|
||||
.replace(/"/g, '\\"') // closing quote character
|
||||
.replace(/\r/g, '\\r') // carriage return
|
||||
.replace(/\u2028/g, '\\u2028') // line separator
|
||||
.replace(/\u2029/g, '\\u2029') // paragraph separator
|
||||
.replace(/\n/g, '\\n') // line feed
|
||||
.replace(/\\/g, '\\\\') // backslash
|
||||
.replace(/"/g, '\\"') // closing quote character
|
||||
.replace(/\r/g, '\\r') // carriage return
|
||||
.replace(/\n/g, '\\n') // line feed
|
||||
.replace(/[\x80-\uFFFF]/g, escape) // non-ASCII characters
|
||||
+ '"';
|
||||
}
|
||||
|
||||
|
@ -3404,13 +3428,13 @@ PEG.parser = (function(){
|
|||
|
||||
var savedReportMatchFailures = reportMatchFailures;
|
||||
reportMatchFailures = false;
|
||||
if (input.substr(pos).match(/^[ - ]/) !== null) {
|
||||
if (input.substr(pos).match(/^[ \xA0\uFEFF\u1680\u180E\u2000-\u200A\u202F\u205F\u3000]/) !== null) {
|
||||
var result0 = input.charAt(pos);
|
||||
pos++;
|
||||
} else {
|
||||
var result0 = null;
|
||||
if (reportMatchFailures) {
|
||||
matchFailed("[ - ]");
|
||||
matchFailed("[ \\xA0\\uFEFF\\u1680\\u180E\\u2000-\\u200A\\u202F\\u205F\\u3000]");
|
||||
}
|
||||
}
|
||||
reportMatchFailures = savedReportMatchFailures;
|
||||
|
|
74
src/utils.js
74
src/utils.js
|
@ -34,6 +34,44 @@ function map(array, callback) {
|
|||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns a string padded on the left to a desired length with a character.
|
||||
*
|
||||
* The code needs to be in sync with th code template in the compilation
|
||||
* function for "action" nodes.
|
||||
*/
|
||||
function padLeft(input, padding, length) {
|
||||
var result = input;
|
||||
|
||||
var padLength = length - input.length;
|
||||
for (var i = 0; i < padLength; i++) {
|
||||
result = padding + result;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns an escape sequence for given character. Uses \x for characters <=
|
||||
* 0xFF to save space, \u for the rest.
|
||||
*
|
||||
* The code needs to be in sync with th code template in the compilation
|
||||
* function for "action" nodes.
|
||||
*/
|
||||
function escape(ch) {
|
||||
var charCode = ch.charCodeAt(0);
|
||||
|
||||
if (charCode < 0xFF) {
|
||||
var escapeChar = 'x';
|
||||
var length = 2;
|
||||
} else {
|
||||
var escapeChar = 'u';
|
||||
var length = 4;
|
||||
}
|
||||
|
||||
return '\\' + escapeChar + padLeft(charCode.toString(16).toUpperCase(), '0', length);
|
||||
}
|
||||
|
||||
/*
|
||||
* Surrounds the string with quotes and escapes characters inside so that the
|
||||
* result is a valid JavaScript string.
|
||||
|
@ -47,14 +85,15 @@ function quote(s) {
|
|||
* literal except for the closing quote character, backslash, carriage return,
|
||||
* line separator, paragraph separator, and line feed. Any character may
|
||||
* appear in the form of an escape sequence.
|
||||
*
|
||||
* For portability, we also escape escape all non-ASCII characters.
|
||||
*/
|
||||
return '"' + s
|
||||
.replace(/\\/g, '\\\\') // backslash
|
||||
.replace(/"/g, '\\"') // closing quote character
|
||||
.replace(/\r/g, '\\r') // carriage return
|
||||
.replace(/\u2028/g, '\\u2028') // line separator
|
||||
.replace(/\u2029/g, '\\u2029') // paragraph separator
|
||||
.replace(/\n/g, '\\n') // line feed
|
||||
.replace(/\\/g, '\\\\') // backslash
|
||||
.replace(/"/g, '\\"') // closing quote character
|
||||
.replace(/\r/g, '\\r') // carriage return
|
||||
.replace(/\n/g, '\\n') // line feed
|
||||
.replace(/[\x80-\uFFFF]/g, escape) // non-ASCII characters
|
||||
+ '"';
|
||||
};
|
||||
|
||||
|
@ -63,17 +102,20 @@ function quote(s) {
|
|||
* characters in a character class of a regular expression.
|
||||
*/
|
||||
function quoteForRegexpClass(s) {
|
||||
/* Based on ECMA-262, 5th ed., 7.8.5 & 15.10.1. */
|
||||
/*
|
||||
* Based on ECMA-262, 5th ed., 7.8.5 & 15.10.1.
|
||||
*
|
||||
* For portability, we also escape escape all non-ASCII characters.
|
||||
*/
|
||||
return s
|
||||
.replace(/\\/g, '\\\\') // backslash
|
||||
.replace(/\0/g, '\\0') // null, IE needs this
|
||||
.replace(/\//g, '\\/') // closing slash
|
||||
.replace(/]/g, '\\]') // closing bracket
|
||||
.replace(/-/g, '\\-') // dash
|
||||
.replace(/\r/g, '\\r') // carriage return
|
||||
.replace(/\u2028/g, '\\u2028') // line separator
|
||||
.replace(/\u2029/g, '\\u2029') // paragraph separator
|
||||
.replace(/\n/g, '\\n') // line feed
|
||||
.replace(/\\/g, '\\\\') // backslash
|
||||
.replace(/\0/g, '\\0') // null, IE needs this
|
||||
.replace(/\//g, '\\/') // closing slash
|
||||
.replace(/]/g, '\\]') // closing bracket
|
||||
.replace(/-/g, '\\-') // dash
|
||||
.replace(/\r/g, '\\r') // carriage return
|
||||
.replace(/\n/g, '\\n') // line feed
|
||||
.replace(/[\x80-\uFFFF]/g, escape) // non-ASCII characters
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -435,7 +435,7 @@ test("parses bracketDelimitedCharacter", function() {
|
|||
parserParses("start = [\\n]", classGrammar(false, ["\n"], "[\\n]"));
|
||||
parserParses("start = [\\0]", classGrammar(false, ["\0"], "[\\0]"));
|
||||
parserParses("start = [\\x00]", classGrammar(false, ["\0"], "[\\0]"));
|
||||
parserParses("start = [\\u0120]", classGrammar(false, ["\u0120"], "[\u0120]"));
|
||||
parserParses("start = [\\u0120]", classGrammar(false, ["\u0120"], "[\\u0120]"));
|
||||
parserParses("start = [\\\n]", classGrammar(false, ["\n"], "[\\n]"));
|
||||
});
|
||||
|
||||
|
|
Loading…
Reference in a new issue