diff --git a/lib/parser.js b/lib/parser.js index 5d5c861..0f7ed5f 100644 --- a/lib/parser.js +++ b/lib/parser.js @@ -240,43 +240,23 @@ module.exports = (function() { peg$c93 = "]", peg$c94 = { type: "literal", value: "]", description: "\"]\"" }, peg$c95 = function(inverted, parts, flags) { - var partsConverted = utils.map(parts, function(part) { return part.data; }); - var rawText = "[" - + (inverted !== null ? inverted : "") - + utils.map(parts, function(part) { return part.rawText; }).join("") - + "]" - + (flags !== null ? flags : ""); - - return { - type: "class", - parts: partsConverted, - // FIXME: Get the raw text from the input directly. - rawText: rawText, - inverted: inverted === "^", - ignoreCase: flags === "i" - }; - }, - peg$c96 = "-", - peg$c97 = { type: "literal", value: "-", description: "\"-\"" }, - peg$c98 = function(begin, end) { - if (begin.data.charCodeAt(0) > end.data.charCodeAt(0)) { - error( - "Invalid character range: " + begin.rawText + "-" + end.rawText + "." - ); + return { + type: "class", + parts: parts, + rawText: text().replace(/\s+$/, ""), + inverted: inverted === "^", + ignoreCase: flags === "i" + }; + }, + peg$c96 = function(class_) { return class_; }, + peg$c97 = "-", + peg$c98 = { type: "literal", value: "-", description: "\"-\"" }, + peg$c99 = function(begin, end) { + if (begin.charCodeAt(0) > end.charCodeAt(0)) { + error("Invalid character range: " + text() + "."); } - return { - data: [begin.data, end.data], - // FIXME: Get the raw text from the input directly. - rawText: begin.rawText + "-" + end.rawText - }; - }, - peg$c99 = function(char_) { - return { - data: char_, - // FIXME: Get the raw text from the input directly. - rawText: utils.quoteForRegexpClass(char_) - }; + return [begin, end]; }, peg$c100 = "x", peg$c101 = { type: "literal", value: "x", description: "\"x\"" }, @@ -332,7 +312,7 @@ module.exports = (function() { peg$c141 = { type: "class", value: "[\\n\\r\\u2028\\u2029]", description: "[\\n\\r\\u2028\\u2029]" }, peg$c142 = { type: "other", description: "whitespace" }, peg$c143 = /^[ \t\x0B\f\xA0\uFEFF\u1680\u180E\u2000-\u200A\u202F\u205F\u3000]/, - peg$c144 = { type: "class", value: "[ \\t\\x0B\\f\\xA0\\uFEFF\\u1680\\u180E\\u2000-\\u200A\\u202F\\u205F\\u3000]", description: "[ \\t\\x0B\\f\\xA0\\uFEFF\\u1680\\u180E\\u2000-\\u200A\\u202F\\u205F\\u3000]" }, + peg$c144 = { type: "class", value: "[ \\t\\v\\f\\u00A0\\uFEFF\\u1680\\u180E\\u2000-\\u200A\\u202F\\u205F\\u3000]", description: "[ \\t\\v\\f\\u00A0\\uFEFF\\u1680\\u180E\\u2000-\\u200A\\u202F\\u205F\\u3000]" }, peg$currPos = 0, peg$reportedPos = 0, @@ -1913,78 +1893,87 @@ module.exports = (function() { peg$silentFails++; s0 = peg$currPos; + s1 = peg$currPos; if (input.charCodeAt(peg$currPos) === 91) { - s1 = peg$c89; + s2 = peg$c89; peg$currPos++; } else { - s1 = peg$FAILED; + s2 = peg$FAILED; if (peg$silentFails === 0) { peg$fail(peg$c90); } } - if (s1 !== peg$FAILED) { + if (s2 !== peg$FAILED) { if (input.charCodeAt(peg$currPos) === 94) { - s2 = peg$c91; + s3 = peg$c91; peg$currPos++; } else { - s2 = peg$FAILED; + s3 = peg$FAILED; if (peg$silentFails === 0) { peg$fail(peg$c92); } } - if (s2 === peg$FAILED) { - s2 = peg$c1; + if (s3 === peg$FAILED) { + s3 = peg$c1; } - if (s2 !== peg$FAILED) { - s3 = []; - s4 = peg$parseclassCharacterRange(); - if (s4 === peg$FAILED) { - s4 = peg$parseclassCharacter(); + if (s3 !== peg$FAILED) { + s4 = []; + s5 = peg$parseclassCharacterRange(); + if (s5 === peg$FAILED) { + s5 = peg$parsebracketDelimitedCharacter(); } - while (s4 !== peg$FAILED) { - s3.push(s4); - s4 = peg$parseclassCharacterRange(); - if (s4 === peg$FAILED) { - s4 = peg$parseclassCharacter(); + while (s5 !== peg$FAILED) { + s4.push(s5); + s5 = peg$parseclassCharacterRange(); + if (s5 === peg$FAILED) { + s5 = peg$parsebracketDelimitedCharacter(); } } - if (s3 !== peg$FAILED) { + if (s4 !== peg$FAILED) { if (input.charCodeAt(peg$currPos) === 93) { - s4 = peg$c93; + s5 = peg$c93; peg$currPos++; } else { - s4 = peg$FAILED; + s5 = peg$FAILED; if (peg$silentFails === 0) { peg$fail(peg$c94); } } - if (s4 !== peg$FAILED) { + if (s5 !== peg$FAILED) { if (input.charCodeAt(peg$currPos) === 105) { - s5 = peg$c74; + s6 = peg$c74; peg$currPos++; } else { - s5 = peg$FAILED; + s6 = peg$FAILED; if (peg$silentFails === 0) { peg$fail(peg$c75); } } - if (s5 === peg$FAILED) { - s5 = peg$c1; + if (s6 === peg$FAILED) { + s6 = peg$c1; } - if (s5 !== peg$FAILED) { - s6 = peg$parse__(); - if (s6 !== peg$FAILED) { - peg$reportedPos = s0; - s1 = peg$c95(s2, s3, s5); - s0 = s1; - } else { - peg$currPos = s0; - s0 = peg$c0; - } + if (s6 !== peg$FAILED) { + peg$reportedPos = s1; + s2 = peg$c95(s3, s4, s6); + s1 = s2; } else { - peg$currPos = s0; - s0 = peg$c0; + peg$currPos = s1; + s1 = peg$c0; } } else { - peg$currPos = s0; - s0 = peg$c0; + peg$currPos = s1; + s1 = peg$c0; } } else { - peg$currPos = s0; - s0 = peg$c0; + peg$currPos = s1; + s1 = peg$c0; } + } else { + peg$currPos = s1; + s1 = peg$c0; + } + } else { + peg$currPos = s1; + s1 = peg$c0; + } + if (s1 !== peg$FAILED) { + s2 = peg$parse__(); + if (s2 !== peg$FAILED) { + peg$reportedPos = s0; + s1 = peg$c96(s1); + s0 = s1; } else { peg$currPos = s0; s0 = peg$c0; @@ -2006,20 +1995,20 @@ module.exports = (function() { var s0, s1, s2, s3; s0 = peg$currPos; - s1 = peg$parseclassCharacter(); + s1 = peg$parsebracketDelimitedCharacter(); if (s1 !== peg$FAILED) { if (input.charCodeAt(peg$currPos) === 45) { - s2 = peg$c96; + s2 = peg$c97; peg$currPos++; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$c97); } + if (peg$silentFails === 0) { peg$fail(peg$c98); } } if (s2 !== peg$FAILED) { - s3 = peg$parseclassCharacter(); + s3 = peg$parsebracketDelimitedCharacter(); if (s3 !== peg$FAILED) { peg$reportedPos = s0; - s1 = peg$c98(s1, s3); + s1 = peg$c99(s1, s3); s0 = s1; } else { peg$currPos = s0; @@ -2037,20 +2026,6 @@ module.exports = (function() { return s0; } - function peg$parseclassCharacter() { - var s0, s1; - - s0 = peg$currPos; - s1 = peg$parsebracketDelimitedCharacter(); - if (s1 !== peg$FAILED) { - peg$reportedPos = s0; - s1 = peg$c99(s1); - } - s0 = s1; - - return s0; - } - function peg$parsebracketDelimitedCharacter() { var s0; diff --git a/spec/parser.spec.js b/spec/parser.spec.js index bb5235b..e6d4193 100644 --- a/spec/parser.spec.js +++ b/spec/parser.spec.js @@ -446,10 +446,6 @@ describe("PEG.js grammar parser", function() { classGrammar([["a", "d"]], "[a-d]i", false, true) ); - expect('start = [\u0080\u0081\u0082]').toParseAs( - classGrammar(["\u0080", "\u0081", "\u0082"], "[\\x80\\x81\\x82]") - ); - expect('start = [a-d]\n').toParseAs(classGrammar([["a", "d"]], "[a-d]")); }); @@ -461,17 +457,11 @@ describe("PEG.js grammar parser", function() { expect('start = [b-a]').toFailToParse({ message: "Invalid character range: b-a." }); - expect('start = [\u0081-\u0080]').toFailToParse({ - message: "Invalid character range: \\x81-\\x80." - }); }); /* Canonical classCharacter is "a". */ it("parses classCharacter", function() { expect('start = [a]').toParseAs(classGrammar(["a"], "[a]")); - - /* This test demonstrates that |rawText| is not really "raw". */ - expect('start = [\u0080]').toParseAs(classGrammar(["\x80"], "[\\x80]")); }); /* Canonical bracketDelimitedCharacter is "a". */ @@ -481,7 +471,7 @@ describe("PEG.js grammar parser", function() { expect('start = [\\0]' ).toParseAs(classGrammar(["\x00"], "[\\0]")); expect('start = [\\xFF]' ).toParseAs(classGrammar(["\xFF"], "[\\xFF]")); expect('start = [\\uFFFF]').toParseAs(classGrammar(["\uFFFF"], "[\\uFFFF]")); - expect('start = [\\\n]' ).toParseAs(classGrammar(["\n"], "[\\n]")); + expect('start = [\\\n]' ).toParseAs(classGrammar(["\n"], "[\\\n]")); }); /* Canonical simpleBracketDelimiedCharacter is "a". */ diff --git a/src/parser.pegjs b/src/parser.pegjs index 199935a..58927d0 100644 --- a/src/parser.pegjs +++ b/src/parser.pegjs @@ -246,47 +246,31 @@ simpleSingleQuotedCharacter = !("'" / "\\" / eolChar) char_:. { return char_; } class "character class" - = "[" inverted:"^"? parts:(classCharacterRange / classCharacter)* "]" flags:"i"? __ { - var partsConverted = utils.map(parts, function(part) { return part.data; }); - var rawText = "[" - + (inverted !== null ? inverted : "") - + utils.map(parts, function(part) { return part.rawText; }).join("") - + "]" - + (flags !== null ? flags : ""); - - return { - type: "class", - parts: partsConverted, - // FIXME: Get the raw text from the input directly. - rawText: rawText, - inverted: inverted === "^", - ignoreCase: flags === "i" - }; - } + = class_:( + "[" inverted:"^"? parts:(classCharacterRange / classCharacter)* "]" flags:"i"? { + return { + type: "class", + parts: parts, + rawText: text().replace(/\s+$/, ""), + inverted: inverted === "^", + ignoreCase: flags === "i" + }; + } + ) + __ + { return class_; } classCharacterRange = begin:classCharacter "-" end:classCharacter { - if (begin.data.charCodeAt(0) > end.data.charCodeAt(0)) { - error( - "Invalid character range: " + begin.rawText + "-" + end.rawText + "." - ); + if (begin.charCodeAt(0) > end.charCodeAt(0)) { + error("Invalid character range: " + text() + "."); } - return { - data: [begin.data, end.data], - // FIXME: Get the raw text from the input directly. - rawText: begin.rawText + "-" + end.rawText - }; + return [begin, end]; } classCharacter - = char_:bracketDelimitedCharacter { - return { - data: char_, - // FIXME: Get the raw text from the input directly. - rawText: utils.quoteForRegexpClass(char_) - }; - } + = bracketDelimitedCharacter bracketDelimitedCharacter = simpleBracketDelimitedCharacter