PEG.js grammar: Fix how |rawText| is created

Before this commit, the value of the |rawText| property of "class" AST
nodes was created in a hackish way from processed input and it didn't
always exactly represent the actual input text.

This commit changes the code so that the value of the |rawText| property
is created using the |text| function. This is a clean way which also
resolves the exact representation problem.
redux
David Majda 10 years ago committed by David Majda
parent df154daafb
commit 3dbec0b30d

@ -240,43 +240,23 @@ module.exports = (function() {
peg$c93 = "]",
peg$c94 = { type: "literal", value: "]", description: "\"]\"" },
peg$c95 = function(inverted, parts, flags) {
var partsConverted = utils.map(parts, function(part) { return part.data; });
var rawText = "["
+ (inverted !== null ? inverted : "")
+ utils.map(parts, function(part) { return part.rawText; }).join("")
+ "]"
+ (flags !== null ? flags : "");
return {
type: "class",
parts: partsConverted,
// FIXME: Get the raw text from the input directly.
rawText: rawText,
inverted: inverted === "^",
ignoreCase: flags === "i"
};
},
peg$c96 = "-",
peg$c97 = { type: "literal", value: "-", description: "\"-\"" },
peg$c98 = function(begin, end) {
if (begin.data.charCodeAt(0) > end.data.charCodeAt(0)) {
error(
"Invalid character range: " + begin.rawText + "-" + end.rawText + "."
);
return {
type: "class",
parts: parts,
rawText: text().replace(/\s+$/, ""),
inverted: inverted === "^",
ignoreCase: flags === "i"
};
},
peg$c96 = function(class_) { return class_; },
peg$c97 = "-",
peg$c98 = { type: "literal", value: "-", description: "\"-\"" },
peg$c99 = function(begin, end) {
if (begin.charCodeAt(0) > end.charCodeAt(0)) {
error("Invalid character range: " + text() + ".");
}
return {
data: [begin.data, end.data],
// FIXME: Get the raw text from the input directly.
rawText: begin.rawText + "-" + end.rawText
};
},
peg$c99 = function(char_) {
return {
data: char_,
// FIXME: Get the raw text from the input directly.
rawText: utils.quoteForRegexpClass(char_)
};
return [begin, end];
},
peg$c100 = "x",
peg$c101 = { type: "literal", value: "x", description: "\"x\"" },
@ -332,7 +312,7 @@ module.exports = (function() {
peg$c141 = { type: "class", value: "[\\n\\r\\u2028\\u2029]", description: "[\\n\\r\\u2028\\u2029]" },
peg$c142 = { type: "other", description: "whitespace" },
peg$c143 = /^[ \t\x0B\f\xA0\uFEFF\u1680\u180E\u2000-\u200A\u202F\u205F\u3000]/,
peg$c144 = { type: "class", value: "[ \\t\\x0B\\f\\xA0\\uFEFF\\u1680\\u180E\\u2000-\\u200A\\u202F\\u205F\\u3000]", description: "[ \\t\\x0B\\f\\xA0\\uFEFF\\u1680\\u180E\\u2000-\\u200A\\u202F\\u205F\\u3000]" },
peg$c144 = { type: "class", value: "[ \\t\\v\\f\\u00A0\\uFEFF\\u1680\\u180E\\u2000-\\u200A\\u202F\\u205F\\u3000]", description: "[ \\t\\v\\f\\u00A0\\uFEFF\\u1680\\u180E\\u2000-\\u200A\\u202F\\u205F\\u3000]" },
peg$currPos = 0,
peg$reportedPos = 0,
@ -1913,78 +1893,87 @@ module.exports = (function() {
peg$silentFails++;
s0 = peg$currPos;
s1 = peg$currPos;
if (input.charCodeAt(peg$currPos) === 91) {
s1 = peg$c89;
s2 = peg$c89;
peg$currPos++;
} else {
s1 = peg$FAILED;
s2 = peg$FAILED;
if (peg$silentFails === 0) { peg$fail(peg$c90); }
}
if (s1 !== peg$FAILED) {
if (s2 !== peg$FAILED) {
if (input.charCodeAt(peg$currPos) === 94) {
s2 = peg$c91;
s3 = peg$c91;
peg$currPos++;
} else {
s2 = peg$FAILED;
s3 = peg$FAILED;
if (peg$silentFails === 0) { peg$fail(peg$c92); }
}
if (s2 === peg$FAILED) {
s2 = peg$c1;
if (s3 === peg$FAILED) {
s3 = peg$c1;
}
if (s2 !== peg$FAILED) {
s3 = [];
s4 = peg$parseclassCharacterRange();
if (s4 === peg$FAILED) {
s4 = peg$parseclassCharacter();
if (s3 !== peg$FAILED) {
s4 = [];
s5 = peg$parseclassCharacterRange();
if (s5 === peg$FAILED) {
s5 = peg$parsebracketDelimitedCharacter();
}
while (s4 !== peg$FAILED) {
s3.push(s4);
s4 = peg$parseclassCharacterRange();
if (s4 === peg$FAILED) {
s4 = peg$parseclassCharacter();
while (s5 !== peg$FAILED) {
s4.push(s5);
s5 = peg$parseclassCharacterRange();
if (s5 === peg$FAILED) {
s5 = peg$parsebracketDelimitedCharacter();
}
}
if (s3 !== peg$FAILED) {
if (s4 !== peg$FAILED) {
if (input.charCodeAt(peg$currPos) === 93) {
s4 = peg$c93;
s5 = peg$c93;
peg$currPos++;
} else {
s4 = peg$FAILED;
s5 = peg$FAILED;
if (peg$silentFails === 0) { peg$fail(peg$c94); }
}
if (s4 !== peg$FAILED) {
if (s5 !== peg$FAILED) {
if (input.charCodeAt(peg$currPos) === 105) {
s5 = peg$c74;
s6 = peg$c74;
peg$currPos++;
} else {
s5 = peg$FAILED;
s6 = peg$FAILED;
if (peg$silentFails === 0) { peg$fail(peg$c75); }
}
if (s5 === peg$FAILED) {
s5 = peg$c1;
if (s6 === peg$FAILED) {
s6 = peg$c1;
}
if (s5 !== peg$FAILED) {
s6 = peg$parse__();
if (s6 !== peg$FAILED) {
peg$reportedPos = s0;
s1 = peg$c95(s2, s3, s5);
s0 = s1;
} else {
peg$currPos = s0;
s0 = peg$c0;
}
if (s6 !== peg$FAILED) {
peg$reportedPos = s1;
s2 = peg$c95(s3, s4, s6);
s1 = s2;
} else {
peg$currPos = s0;
s0 = peg$c0;
peg$currPos = s1;
s1 = peg$c0;
}
} else {
peg$currPos = s0;
s0 = peg$c0;
peg$currPos = s1;
s1 = peg$c0;
}
} else {
peg$currPos = s0;
s0 = peg$c0;
peg$currPos = s1;
s1 = peg$c0;
}
} else {
peg$currPos = s1;
s1 = peg$c0;
}
} else {
peg$currPos = s1;
s1 = peg$c0;
}
if (s1 !== peg$FAILED) {
s2 = peg$parse__();
if (s2 !== peg$FAILED) {
peg$reportedPos = s0;
s1 = peg$c96(s1);
s0 = s1;
} else {
peg$currPos = s0;
s0 = peg$c0;
@ -2006,20 +1995,20 @@ module.exports = (function() {
var s0, s1, s2, s3;
s0 = peg$currPos;
s1 = peg$parseclassCharacter();
s1 = peg$parsebracketDelimitedCharacter();
if (s1 !== peg$FAILED) {
if (input.charCodeAt(peg$currPos) === 45) {
s2 = peg$c96;
s2 = peg$c97;
peg$currPos++;
} else {
s2 = peg$FAILED;
if (peg$silentFails === 0) { peg$fail(peg$c97); }
if (peg$silentFails === 0) { peg$fail(peg$c98); }
}
if (s2 !== peg$FAILED) {
s3 = peg$parseclassCharacter();
s3 = peg$parsebracketDelimitedCharacter();
if (s3 !== peg$FAILED) {
peg$reportedPos = s0;
s1 = peg$c98(s1, s3);
s1 = peg$c99(s1, s3);
s0 = s1;
} else {
peg$currPos = s0;
@ -2037,20 +2026,6 @@ module.exports = (function() {
return s0;
}
function peg$parseclassCharacter() {
var s0, s1;
s0 = peg$currPos;
s1 = peg$parsebracketDelimitedCharacter();
if (s1 !== peg$FAILED) {
peg$reportedPos = s0;
s1 = peg$c99(s1);
}
s0 = s1;
return s0;
}
function peg$parsebracketDelimitedCharacter() {
var s0;

@ -446,10 +446,6 @@ describe("PEG.js grammar parser", function() {
classGrammar([["a", "d"]], "[a-d]i", false, true)
);
expect('start = [\u0080\u0081\u0082]').toParseAs(
classGrammar(["\u0080", "\u0081", "\u0082"], "[\\x80\\x81\\x82]")
);
expect('start = [a-d]\n').toParseAs(classGrammar([["a", "d"]], "[a-d]"));
});
@ -461,17 +457,11 @@ describe("PEG.js grammar parser", function() {
expect('start = [b-a]').toFailToParse({
message: "Invalid character range: b-a."
});
expect('start = [\u0081-\u0080]').toFailToParse({
message: "Invalid character range: \\x81-\\x80."
});
});
/* Canonical classCharacter is "a". */
it("parses classCharacter", function() {
expect('start = [a]').toParseAs(classGrammar(["a"], "[a]"));
/* This test demonstrates that |rawText| is not really "raw". */
expect('start = [\u0080]').toParseAs(classGrammar(["\x80"], "[\\x80]"));
});
/* Canonical bracketDelimitedCharacter is "a". */
@ -481,7 +471,7 @@ describe("PEG.js grammar parser", function() {
expect('start = [\\0]' ).toParseAs(classGrammar(["\x00"], "[\\0]"));
expect('start = [\\xFF]' ).toParseAs(classGrammar(["\xFF"], "[\\xFF]"));
expect('start = [\\uFFFF]').toParseAs(classGrammar(["\uFFFF"], "[\\uFFFF]"));
expect('start = [\\\n]' ).toParseAs(classGrammar(["\n"], "[\\n]"));
expect('start = [\\\n]' ).toParseAs(classGrammar(["\n"], "[\\\n]"));
});
/* Canonical simpleBracketDelimiedCharacter is "a". */

@ -246,47 +246,31 @@ simpleSingleQuotedCharacter
= !("'" / "\\" / eolChar) char_:. { return char_; }
class "character class"
= "[" inverted:"^"? parts:(classCharacterRange / classCharacter)* "]" flags:"i"? __ {
var partsConverted = utils.map(parts, function(part) { return part.data; });
var rawText = "["
+ (inverted !== null ? inverted : "")
+ utils.map(parts, function(part) { return part.rawText; }).join("")
+ "]"
+ (flags !== null ? flags : "");
return {
type: "class",
parts: partsConverted,
// FIXME: Get the raw text from the input directly.
rawText: rawText,
inverted: inverted === "^",
ignoreCase: flags === "i"
};
}
= class_:(
"[" inverted:"^"? parts:(classCharacterRange / classCharacter)* "]" flags:"i"? {
return {
type: "class",
parts: parts,
rawText: text().replace(/\s+$/, ""),
inverted: inverted === "^",
ignoreCase: flags === "i"
};
}
)
__
{ return class_; }
classCharacterRange
= begin:classCharacter "-" end:classCharacter {
if (begin.data.charCodeAt(0) > end.data.charCodeAt(0)) {
error(
"Invalid character range: " + begin.rawText + "-" + end.rawText + "."
);
if (begin.charCodeAt(0) > end.charCodeAt(0)) {
error("Invalid character range: " + text() + ".");
}
return {
data: [begin.data, end.data],
// FIXME: Get the raw text from the input directly.
rawText: begin.rawText + "-" + end.rawText
};
return [begin, end];
}
classCharacter
= char_:bracketDelimitedCharacter {
return {
data: char_,
// FIXME: Get the raw text from the input directly.
rawText: utils.quoteForRegexpClass(char_)
};
}
= bracketDelimitedCharacter
bracketDelimitedCharacter
= simpleBracketDelimitedCharacter

Loading…
Cancel
Save