Make sure quoting functions output only ASCII characters
This patch prevents portability problems. In particular, it fixes a problem where "SyntaxError: Invalid range in character class." error appeared when using command-line version on Widnows (see GH-13).
This commit is contained in:
parent
4d68812b65
commit
aeb2cb4f1c
|
@ -119,6 +119,33 @@ PEG.compiler.emitter = function(ast) {
|
||||||
" var rightmostMatchFailuresExpected = [];",
|
" var rightmostMatchFailuresExpected = [];",
|
||||||
" var cache = {};",
|
" var cache = {};",
|
||||||
" ",
|
" ",
|
||||||
|
/* This needs to be in sync with |padLeft| in utils.js. */
|
||||||
|
" function padLeft(input, padding, length) {",
|
||||||
|
" var result = input;",
|
||||||
|
" ",
|
||||||
|
" var padLength = length - input.length;",
|
||||||
|
" for (var i = 0; i < padLength; i++) {",
|
||||||
|
" result = padding + result;",
|
||||||
|
" }",
|
||||||
|
" ",
|
||||||
|
" return result;",
|
||||||
|
" }",
|
||||||
|
" ",
|
||||||
|
/* This needs to be in sync with |escape| in utils.js. */
|
||||||
|
" function escape(ch) {",
|
||||||
|
" var charCode = ch.charCodeAt(0);",
|
||||||
|
" ",
|
||||||
|
" if (charCode < 0xFF) {",
|
||||||
|
" var escapeChar = 'x';",
|
||||||
|
" var length = 2;",
|
||||||
|
" } else {",
|
||||||
|
" var escapeChar = 'u';",
|
||||||
|
" var length = 4;",
|
||||||
|
" }",
|
||||||
|
" ",
|
||||||
|
" return '\\\\' + escapeChar + padLeft(charCode.toString(16).toUpperCase(), '0', length);",
|
||||||
|
" }",
|
||||||
|
" ",
|
||||||
/* This needs to be in sync with |quote| in utils.js. */
|
/* This needs to be in sync with |quote| in utils.js. */
|
||||||
" function quote(s) {",
|
" function quote(s) {",
|
||||||
" /*",
|
" /*",
|
||||||
|
@ -128,12 +155,11 @@ PEG.compiler.emitter = function(ast) {
|
||||||
" * Any character may appear in the form of an escape sequence.",
|
" * Any character may appear in the form of an escape sequence.",
|
||||||
" */",
|
" */",
|
||||||
" return '\"' + s",
|
" return '\"' + s",
|
||||||
" .replace(/\\\\/g, '\\\\\\\\') // backslash",
|
" .replace(/\\\\/g, '\\\\\\\\') // backslash",
|
||||||
" .replace(/\"/g, '\\\\\"') // closing quote character",
|
" .replace(/\"/g, '\\\\\"') // closing quote character",
|
||||||
" .replace(/\\r/g, '\\\\r') // carriage return",
|
" .replace(/\\r/g, '\\\\r') // carriage return",
|
||||||
" .replace(/\\u2028/g, '\\\\u2028') // line separator",
|
" .replace(/\\n/g, '\\\\n') // line feed",
|
||||||
" .replace(/\\u2029/g, '\\\\u2029') // paragraph separator",
|
" .replace(/[\\x80-\\uFFFF]/g, escape) // non-ASCII characters",
|
||||||
" .replace(/\\n/g, '\\\\n') // line feed",
|
|
||||||
" + '\"';",
|
" + '\"';",
|
||||||
" }",
|
" }",
|
||||||
" ",
|
" ",
|
||||||
|
|
|
@ -15,6 +15,31 @@ PEG.parser = (function(){
|
||||||
var rightmostMatchFailuresExpected = [];
|
var rightmostMatchFailuresExpected = [];
|
||||||
var cache = {};
|
var cache = {};
|
||||||
|
|
||||||
|
function padLeft(input, padding, length) {
|
||||||
|
var result = input;
|
||||||
|
|
||||||
|
var padLength = length - input.length;
|
||||||
|
for (var i = 0; i < padLength; i++) {
|
||||||
|
result = padding + result;
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
function escape(ch) {
|
||||||
|
var charCode = ch.charCodeAt(0);
|
||||||
|
|
||||||
|
if (charCode < 0xFF) {
|
||||||
|
var escapeChar = 'x';
|
||||||
|
var length = 2;
|
||||||
|
} else {
|
||||||
|
var escapeChar = 'u';
|
||||||
|
var length = 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
return '\\' + escapeChar + padLeft(charCode.toString(16).toUpperCase(), '0', length);
|
||||||
|
}
|
||||||
|
|
||||||
function quote(s) {
|
function quote(s) {
|
||||||
/*
|
/*
|
||||||
* ECMA-262, 5th ed., 7.8.4: All characters may appear literally in a
|
* ECMA-262, 5th ed., 7.8.4: All characters may appear literally in a
|
||||||
|
@ -23,12 +48,11 @@ PEG.parser = (function(){
|
||||||
* Any character may appear in the form of an escape sequence.
|
* Any character may appear in the form of an escape sequence.
|
||||||
*/
|
*/
|
||||||
return '"' + s
|
return '"' + s
|
||||||
.replace(/\\/g, '\\\\') // backslash
|
.replace(/\\/g, '\\\\') // backslash
|
||||||
.replace(/"/g, '\\"') // closing quote character
|
.replace(/"/g, '\\"') // closing quote character
|
||||||
.replace(/\r/g, '\\r') // carriage return
|
.replace(/\r/g, '\\r') // carriage return
|
||||||
.replace(/\u2028/g, '\\u2028') // line separator
|
.replace(/\n/g, '\\n') // line feed
|
||||||
.replace(/\u2029/g, '\\u2029') // paragraph separator
|
.replace(/[\x80-\uFFFF]/g, escape) // non-ASCII characters
|
||||||
.replace(/\n/g, '\\n') // line feed
|
|
||||||
+ '"';
|
+ '"';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3404,13 +3428,13 @@ PEG.parser = (function(){
|
||||||
|
|
||||||
var savedReportMatchFailures = reportMatchFailures;
|
var savedReportMatchFailures = reportMatchFailures;
|
||||||
reportMatchFailures = false;
|
reportMatchFailures = false;
|
||||||
if (input.substr(pos).match(/^[ - ]/) !== null) {
|
if (input.substr(pos).match(/^[ \xA0\uFEFF\u1680\u180E\u2000-\u200A\u202F\u205F\u3000]/) !== null) {
|
||||||
var result0 = input.charAt(pos);
|
var result0 = input.charAt(pos);
|
||||||
pos++;
|
pos++;
|
||||||
} else {
|
} else {
|
||||||
var result0 = null;
|
var result0 = null;
|
||||||
if (reportMatchFailures) {
|
if (reportMatchFailures) {
|
||||||
matchFailed("[ - ]");
|
matchFailed("[ \\xA0\\uFEFF\\u1680\\u180E\\u2000-\\u200A\\u202F\\u205F\\u3000]");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
reportMatchFailures = savedReportMatchFailures;
|
reportMatchFailures = savedReportMatchFailures;
|
||||||
|
|
74
src/utils.js
74
src/utils.js
|
@ -34,6 +34,44 @@ function map(array, callback) {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Returns a string padded on the left to a desired length with a character.
|
||||||
|
*
|
||||||
|
* The code needs to be in sync with th code template in the compilation
|
||||||
|
* function for "action" nodes.
|
||||||
|
*/
|
||||||
|
function padLeft(input, padding, length) {
|
||||||
|
var result = input;
|
||||||
|
|
||||||
|
var padLength = length - input.length;
|
||||||
|
for (var i = 0; i < padLength; i++) {
|
||||||
|
result = padding + result;
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Returns an escape sequence for given character. Uses \x for characters <=
|
||||||
|
* 0xFF to save space, \u for the rest.
|
||||||
|
*
|
||||||
|
* The code needs to be in sync with th code template in the compilation
|
||||||
|
* function for "action" nodes.
|
||||||
|
*/
|
||||||
|
function escape(ch) {
|
||||||
|
var charCode = ch.charCodeAt(0);
|
||||||
|
|
||||||
|
if (charCode < 0xFF) {
|
||||||
|
var escapeChar = 'x';
|
||||||
|
var length = 2;
|
||||||
|
} else {
|
||||||
|
var escapeChar = 'u';
|
||||||
|
var length = 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
return '\\' + escapeChar + padLeft(charCode.toString(16).toUpperCase(), '0', length);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Surrounds the string with quotes and escapes characters inside so that the
|
* Surrounds the string with quotes and escapes characters inside so that the
|
||||||
* result is a valid JavaScript string.
|
* result is a valid JavaScript string.
|
||||||
|
@ -47,14 +85,15 @@ function quote(s) {
|
||||||
* literal except for the closing quote character, backslash, carriage return,
|
* literal except for the closing quote character, backslash, carriage return,
|
||||||
* line separator, paragraph separator, and line feed. Any character may
|
* line separator, paragraph separator, and line feed. Any character may
|
||||||
* appear in the form of an escape sequence.
|
* appear in the form of an escape sequence.
|
||||||
|
*
|
||||||
|
* For portability, we also escape escape all non-ASCII characters.
|
||||||
*/
|
*/
|
||||||
return '"' + s
|
return '"' + s
|
||||||
.replace(/\\/g, '\\\\') // backslash
|
.replace(/\\/g, '\\\\') // backslash
|
||||||
.replace(/"/g, '\\"') // closing quote character
|
.replace(/"/g, '\\"') // closing quote character
|
||||||
.replace(/\r/g, '\\r') // carriage return
|
.replace(/\r/g, '\\r') // carriage return
|
||||||
.replace(/\u2028/g, '\\u2028') // line separator
|
.replace(/\n/g, '\\n') // line feed
|
||||||
.replace(/\u2029/g, '\\u2029') // paragraph separator
|
.replace(/[\x80-\uFFFF]/g, escape) // non-ASCII characters
|
||||||
.replace(/\n/g, '\\n') // line feed
|
|
||||||
+ '"';
|
+ '"';
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -63,17 +102,20 @@ function quote(s) {
|
||||||
* characters in a character class of a regular expression.
|
* characters in a character class of a regular expression.
|
||||||
*/
|
*/
|
||||||
function quoteForRegexpClass(s) {
|
function quoteForRegexpClass(s) {
|
||||||
/* Based on ECMA-262, 5th ed., 7.8.5 & 15.10.1. */
|
/*
|
||||||
|
* Based on ECMA-262, 5th ed., 7.8.5 & 15.10.1.
|
||||||
|
*
|
||||||
|
* For portability, we also escape escape all non-ASCII characters.
|
||||||
|
*/
|
||||||
return s
|
return s
|
||||||
.replace(/\\/g, '\\\\') // backslash
|
.replace(/\\/g, '\\\\') // backslash
|
||||||
.replace(/\0/g, '\\0') // null, IE needs this
|
.replace(/\0/g, '\\0') // null, IE needs this
|
||||||
.replace(/\//g, '\\/') // closing slash
|
.replace(/\//g, '\\/') // closing slash
|
||||||
.replace(/]/g, '\\]') // closing bracket
|
.replace(/]/g, '\\]') // closing bracket
|
||||||
.replace(/-/g, '\\-') // dash
|
.replace(/-/g, '\\-') // dash
|
||||||
.replace(/\r/g, '\\r') // carriage return
|
.replace(/\r/g, '\\r') // carriage return
|
||||||
.replace(/\u2028/g, '\\u2028') // line separator
|
.replace(/\n/g, '\\n') // line feed
|
||||||
.replace(/\u2029/g, '\\u2029') // paragraph separator
|
.replace(/[\x80-\uFFFF]/g, escape) // non-ASCII characters
|
||||||
.replace(/\n/g, '\\n') // line feed
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -435,7 +435,7 @@ test("parses bracketDelimitedCharacter", function() {
|
||||||
parserParses("start = [\\n]", classGrammar(false, ["\n"], "[\\n]"));
|
parserParses("start = [\\n]", classGrammar(false, ["\n"], "[\\n]"));
|
||||||
parserParses("start = [\\0]", classGrammar(false, ["\0"], "[\\0]"));
|
parserParses("start = [\\0]", classGrammar(false, ["\0"], "[\\0]"));
|
||||||
parserParses("start = [\\x00]", classGrammar(false, ["\0"], "[\\0]"));
|
parserParses("start = [\\x00]", classGrammar(false, ["\0"], "[\\0]"));
|
||||||
parserParses("start = [\\u0120]", classGrammar(false, ["\u0120"], "[\u0120]"));
|
parserParses("start = [\\u0120]", classGrammar(false, ["\u0120"], "[\\u0120]"));
|
||||||
parserParses("start = [\\\n]", classGrammar(false, ["\n"], "[\\n]"));
|
parserParses("start = [\\\n]", classGrammar(false, ["\n"], "[\\n]"));
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue