From 0d6b91cb20b750de8724a23920ad48c27530d80b Mon Sep 17 00:00:00 2001 From: David Majda Date: Fri, 28 Mar 2014 15:18:16 +0100 Subject: [PATCH] PEG.js grammar: More JavaScript-like rules for strings/literals/classes --- lib/parser.js | 917 +++++++++++++++++++++++++------------------- spec/parser.spec.js | 159 ++++---- src/parser.pegjs | 165 ++++---- 3 files changed, 658 insertions(+), 583 deletions(-) diff --git a/lib/parser.js b/lib/parser.js index b178aef..7e456ad 100644 --- a/lib/parser.js +++ b/lib/parser.js @@ -216,85 +216,88 @@ module.exports = (function() { peg$c90 = { type: "other", description: "identifier" }, peg$c91 = "_", peg$c92 = { type: "literal", value: "_", description: "\"_\"" }, - peg$c93 = { type: "other", description: "literal" }, - peg$c94 = "i", - peg$c95 = { type: "literal", value: "i", description: "\"i\"" }, - peg$c96 = function(value, flags) { - return { - type: "literal", - value: value, - ignoreCase: flags === "i" - }; + peg$c93 = /^[a-z]/, + peg$c94 = { type: "class", value: "[a-z]", description: "[a-z]" }, + peg$c95 = /^[A-Z]/, + peg$c96 = { type: "class", value: "[A-Z]", description: "[A-Z]" }, + peg$c97 = { type: "other", description: "literal" }, + peg$c98 = "i", + peg$c99 = { type: "literal", value: "i", description: "\"i\"" }, + peg$c100 = function(value, ignoreCase) { + return { type: "literal", value: value, ignoreCase: ignoreCase !== null }; }, - peg$c97 = { type: "other", description: "string" }, - peg$c98 = function(string) { return string; }, - peg$c99 = "\"", - peg$c100 = { type: "literal", value: "\"", description: "\"\\\"\"" }, - peg$c101 = function(chars) { return chars.join(""); }, - peg$c102 = "\\", - peg$c103 = { type: "literal", value: "\\", description: "\"\\\\\"" }, - peg$c104 = function(char_) { return char_; }, + peg$c101 = { type: "other", description: "string" }, + peg$c102 = "\"", + peg$c103 = { type: "literal", value: "\"", description: "\"\\\"\"" }, + peg$c104 = function(chars) { return chars.join(""); }, peg$c105 = "'", peg$c106 = { type: "literal", value: "'", description: "\"'\"" }, - peg$c107 = { type: "other", description: "character class" }, - peg$c108 = "[", - peg$c109 = { type: "literal", value: "[", description: "\"[\"" }, - peg$c110 = "^", - peg$c111 = { type: "literal", value: "^", description: "\"^\"" }, - peg$c112 = "]", - peg$c113 = { type: "literal", value: "]", description: "\"]\"" }, - peg$c114 = function(inverted, parts, flags) { + peg$c107 = "\\", + peg$c108 = { type: "literal", value: "\\", description: "\"\\\\\"" }, + peg$c109 = function() { return text(); }, + peg$c110 = function(sequence) { return sequence; }, + peg$c111 = { type: "other", description: "character class" }, + peg$c112 = "[", + peg$c113 = { type: "literal", value: "[", description: "\"[\"" }, + peg$c114 = "^", + peg$c115 = { type: "literal", value: "^", description: "\"^\"" }, + peg$c116 = "]", + peg$c117 = { type: "literal", value: "]", description: "\"]\"" }, + peg$c118 = function(inverted, parts, ignoreCase) { return { type: "class", parts: parts, - rawText: text().replace(/\s+$/, ""), - inverted: inverted === "^", - ignoreCase: flags === "i" + inverted: inverted !== null, + ignoreCase: ignoreCase !== null, + rawText: text() }; }, - peg$c115 = "-", - peg$c116 = { type: "literal", value: "-", description: "\"-\"" }, - peg$c117 = function(begin, end) { + peg$c119 = "-", + peg$c120 = { type: "literal", value: "-", description: "\"-\"" }, + peg$c121 = function(begin, end) { if (begin.charCodeAt(0) > end.charCodeAt(0)) { - error("Invalid character range: " + text() + "."); + error( + "Invalid character range: " + text() + "." + ); } return [begin, end]; }, - peg$c118 = "x", - peg$c119 = { type: "literal", value: "x", description: "\"x\"" }, - peg$c120 = "u", - peg$c121 = { type: "literal", value: "u", description: "\"u\"" }, - peg$c122 = function(char_) { - return char_ - .replace("b", "\b") - .replace("f", "\f") - .replace("n", "\n") - .replace("r", "\r") - .replace("t", "\t") - .replace("v", "\x0B"); // IE does not recognize "\v". - }, - peg$c123 = "\\0", - peg$c124 = { type: "literal", value: "\\0", description: "\"\\\\0\"" }, - peg$c125 = function() { return "\x00"; }, - peg$c126 = "\\x", - peg$c127 = { type: "literal", value: "\\x", description: "\"\\\\x\"" }, - peg$c128 = function(digits) { + peg$c122 = function() { return ""; }, + peg$c123 = "0", + peg$c124 = { type: "literal", value: "0", description: "\"0\"" }, + peg$c125 = function() { return "\0"; }, + peg$c126 = "b", + peg$c127 = { type: "literal", value: "b", description: "\"b\"" }, + peg$c128 = function() { return "\b"; }, + peg$c129 = "f", + peg$c130 = { type: "literal", value: "f", description: "\"f\"" }, + peg$c131 = function() { return "\f"; }, + peg$c132 = "n", + peg$c133 = { type: "literal", value: "n", description: "\"n\"" }, + peg$c134 = function() { return "\n"; }, + peg$c135 = "r", + peg$c136 = { type: "literal", value: "r", description: "\"r\"" }, + peg$c137 = function() { return "\r"; }, + peg$c138 = "t", + peg$c139 = { type: "literal", value: "t", description: "\"t\"" }, + peg$c140 = function() { return "\t"; }, + peg$c141 = "v", + peg$c142 = { type: "literal", value: "v", description: "\"v\"" }, + peg$c143 = function() { return "\x0B"; }, + peg$c144 = "x", + peg$c145 = { type: "literal", value: "x", description: "\"x\"" }, + peg$c146 = "u", + peg$c147 = { type: "literal", value: "u", description: "\"u\"" }, + peg$c148 = function(digits) { return String.fromCharCode(parseInt(digits, 16)); }, - peg$c129 = "\\u", - peg$c130 = { type: "literal", value: "\\u", description: "\"\\\\u\"" }, - peg$c131 = function(eol) { return ""; }, - peg$c132 = /^[0-9]/, - peg$c133 = { type: "class", value: "[0-9]", description: "[0-9]" }, - peg$c134 = /^[0-9a-fA-F]/, - peg$c135 = { type: "class", value: "[0-9a-fA-F]", description: "[0-9a-fA-F]" }, - peg$c136 = /^[a-z]/, - peg$c137 = { type: "class", value: "[a-z]", description: "[a-z]" }, - peg$c138 = /^[A-Z]/, - peg$c139 = { type: "class", value: "[A-Z]", description: "[A-Z]" }, - peg$c140 = /^[ \xA0\u1680\u2000-\u200A\u202F\u205F\u3000]/, - peg$c141 = { type: "class", value: "[\\u0020\\u00A0\\u1680\\u2000-\\u200A\\u202F\\u205F\\u3000]", description: "[\\u0020\\u00A0\\u1680\\u2000-\\u200A\\u202F\\u205F\\u3000]" }, + peg$c149 = /^[0-9]/, + peg$c150 = { type: "class", value: "[0-9]", description: "[0-9]" }, + peg$c151 = /^[0-9a-f]/i, + peg$c152 = { type: "class", value: "[0-9a-f]i", description: "[0-9a-f]i" }, + peg$c153 = /^[ \xA0\u1680\u2000-\u200A\u202F\u205F\u3000]/, + peg$c154 = { type: "class", value: "[\\u0020\\u00A0\\u1680\\u2000-\\u200A\\u202F\\u205F\\u3000]", description: "[\\u0020\\u00A0\\u1680\\u2000-\\u200A\\u202F\\u205F\\u3000]" }, peg$currPos = 0, peg$reportedPos = 0, @@ -600,7 +603,7 @@ module.exports = (function() { s2 = peg$parse__(); if (s2 !== peg$FAILED) { s3 = peg$currPos; - s4 = peg$parseString(); + s4 = peg$parseStringLiteral(); if (s4 !== peg$FAILED) { s5 = peg$parse__(); if (s5 !== peg$FAILED) { @@ -1219,7 +1222,7 @@ module.exports = (function() { s4 = peg$parse__(); if (s4 !== peg$FAILED) { s5 = peg$currPos; - s6 = peg$parseString(); + s6 = peg$parseStringLiteral(); if (s6 !== peg$FAILED) { s7 = peg$parse__(); if (s7 !== peg$FAILED) { @@ -1279,9 +1282,9 @@ module.exports = (function() { s0 = peg$c0; } if (s0 === peg$FAILED) { - s0 = peg$parseLiteral(); + s0 = peg$parseLiteralMatcher(); if (s0 === peg$FAILED) { - s0 = peg$parseClass(); + s0 = peg$parseCharacterClassMatcher(); if (s0 === peg$FAILED) { s0 = peg$currPos; if (input.charCodeAt(peg$currPos) === 46) { @@ -1838,7 +1841,7 @@ module.exports = (function() { s3 = []; s4 = peg$parseLetter(); if (s4 === peg$FAILED) { - s4 = peg$parseDigit(); + s4 = peg$parseDecimalDigit(); if (s4 === peg$FAILED) { if (input.charCodeAt(peg$currPos) === 95) { s4 = peg$c91; @@ -1853,7 +1856,7 @@ module.exports = (function() { s3.push(s4); s4 = peg$parseLetter(); if (s4 === peg$FAILED) { - s4 = peg$parseDigit(); + s4 = peg$parseDecimalDigit(); if (s4 === peg$FAILED) { if (input.charCodeAt(peg$currPos) === 95) { s4 = peg$c91; @@ -1889,29 +1892,65 @@ module.exports = (function() { return s0; } - function peg$parseLiteral() { + function peg$parseLetter() { + var s0; + + s0 = peg$parseLowerCaseLetter(); + if (s0 === peg$FAILED) { + s0 = peg$parseUpperCaseLetter(); + } + + return s0; + } + + function peg$parseLowerCaseLetter() { + var s0; + + if (peg$c93.test(input.charAt(peg$currPos))) { + s0 = input.charAt(peg$currPos); + peg$currPos++; + } else { + s0 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$c94); } + } + + return s0; + } + + function peg$parseUpperCaseLetter() { + var s0; + + if (peg$c95.test(input.charAt(peg$currPos))) { + s0 = input.charAt(peg$currPos); + peg$currPos++; + } else { + s0 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$c96); } + } + + return s0; + } + + function peg$parseLiteralMatcher() { var s0, s1, s2; peg$silentFails++; s0 = peg$currPos; - s1 = peg$parseDoubleQuotedString(); - if (s1 === peg$FAILED) { - s1 = peg$parseSingleQuotedString(); - } + s1 = peg$parseStringLiteral(); if (s1 !== peg$FAILED) { if (input.charCodeAt(peg$currPos) === 105) { - s2 = peg$c94; + s2 = peg$c98; peg$currPos++; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$c95); } + if (peg$silentFails === 0) { peg$fail(peg$c99); } } if (s2 === peg$FAILED) { s2 = peg$c1; } if (s2 !== peg$FAILED) { peg$reportedPos = s0; - s1 = peg$c96(s1, s2); + s1 = peg$c100(s1, s2); s0 = s1; } else { peg$currPos = s0; @@ -1922,29 +1961,6 @@ module.exports = (function() { s0 = peg$c0; } peg$silentFails--; - if (s0 === peg$FAILED) { - s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$c93); } - } - - return s0; - } - - function peg$parseString() { - var s0, s1; - - peg$silentFails++; - s0 = peg$currPos; - s1 = peg$parseDoubleQuotedString(); - if (s1 === peg$FAILED) { - s1 = peg$parseSingleQuotedString(); - } - if (s1 !== peg$FAILED) { - peg$reportedPos = s0; - s1 = peg$c98(s1); - } - s0 = s1; - peg$silentFails--; if (s0 === peg$FAILED) { s1 = peg$FAILED; if (peg$silentFails === 0) { peg$fail(peg$c97); } @@ -1953,35 +1969,36 @@ module.exports = (function() { return s0; } - function peg$parseDoubleQuotedString() { + function peg$parseStringLiteral() { var s0, s1, s2, s3; + peg$silentFails++; s0 = peg$currPos; if (input.charCodeAt(peg$currPos) === 34) { - s1 = peg$c99; + s1 = peg$c102; peg$currPos++; } else { s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$c100); } + if (peg$silentFails === 0) { peg$fail(peg$c103); } } if (s1 !== peg$FAILED) { s2 = []; - s3 = peg$parseDoubleQuotedCharacter(); + s3 = peg$parseDoubleStringCharacter(); while (s3 !== peg$FAILED) { s2.push(s3); - s3 = peg$parseDoubleQuotedCharacter(); + s3 = peg$parseDoubleStringCharacter(); } if (s2 !== peg$FAILED) { if (input.charCodeAt(peg$currPos) === 34) { - s3 = peg$c99; + s3 = peg$c102; peg$currPos++; } else { s3 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$c100); } + if (peg$silentFails === 0) { peg$fail(peg$c103); } } if (s3 !== peg$FAILED) { peg$reportedPos = s0; - s1 = peg$c101(s2); + s1 = peg$c104(s2); s0 = s1; } else { peg$currPos = s0; @@ -1995,53 +2012,76 @@ module.exports = (function() { peg$currPos = s0; s0 = peg$c0; } - - return s0; - } - - function peg$parseDoubleQuotedCharacter() { - var s0; - - s0 = peg$parseSimpleDoubleQuotedCharacter(); if (s0 === peg$FAILED) { - s0 = peg$parseSimpleEscapeSequence(); - if (s0 === peg$FAILED) { - s0 = peg$parseZeroEscapeSequence(); - if (s0 === peg$FAILED) { - s0 = peg$parseHexEscapeSequence(); - if (s0 === peg$FAILED) { - s0 = peg$parseUnicodeEscapeSequence(); - if (s0 === peg$FAILED) { - s0 = peg$parseEOLEscapeSequence(); - } + s0 = peg$currPos; + if (input.charCodeAt(peg$currPos) === 39) { + s1 = peg$c105; + peg$currPos++; + } else { + s1 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$c106); } + } + if (s1 !== peg$FAILED) { + s2 = []; + s3 = peg$parseSingleStringCharacter(); + while (s3 !== peg$FAILED) { + s2.push(s3); + s3 = peg$parseSingleStringCharacter(); + } + if (s2 !== peg$FAILED) { + if (input.charCodeAt(peg$currPos) === 39) { + s3 = peg$c105; + peg$currPos++; + } else { + s3 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$c106); } + } + if (s3 !== peg$FAILED) { + peg$reportedPos = s0; + s1 = peg$c104(s2); + s0 = s1; + } else { + peg$currPos = s0; + s0 = peg$c0; } + } else { + peg$currPos = s0; + s0 = peg$c0; } + } else { + peg$currPos = s0; + s0 = peg$c0; } } + peg$silentFails--; + if (s0 === peg$FAILED) { + s1 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$c101); } + } return s0; } - function peg$parseSimpleDoubleQuotedCharacter() { + function peg$parseDoubleStringCharacter() { var s0, s1, s2; s0 = peg$currPos; s1 = peg$currPos; peg$silentFails++; if (input.charCodeAt(peg$currPos) === 34) { - s2 = peg$c99; + s2 = peg$c102; peg$currPos++; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$c100); } + if (peg$silentFails === 0) { peg$fail(peg$c103); } } if (s2 === peg$FAILED) { if (input.charCodeAt(peg$currPos) === 92) { - s2 = peg$c102; + s2 = peg$c107; peg$currPos++; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$c103); } + if (peg$silentFails === 0) { peg$fail(peg$c108); } } if (s2 === peg$FAILED) { s2 = peg$parseLineTerminator(); @@ -2055,16 +2095,10 @@ module.exports = (function() { s1 = peg$c0; } if (s1 !== peg$FAILED) { - if (input.length > peg$currPos) { - s2 = input.charAt(peg$currPos); - peg$currPos++; - } else { - s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$c48); } - } + s2 = peg$parseSourceCharacter(); if (s2 !== peg$FAILED) { peg$reportedPos = s0; - s1 = peg$c104(s2); + s1 = peg$c109(); s0 = s1; } else { peg$currPos = s0; @@ -2074,39 +2108,20 @@ module.exports = (function() { peg$currPos = s0; s0 = peg$c0; } - - return s0; - } - - function peg$parseSingleQuotedString() { - var s0, s1, s2, s3; - - s0 = peg$currPos; - if (input.charCodeAt(peg$currPos) === 39) { - s1 = peg$c105; - peg$currPos++; - } else { - s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$c106); } - } - if (s1 !== peg$FAILED) { - s2 = []; - s3 = peg$parseSingleQuotedCharacter(); - while (s3 !== peg$FAILED) { - s2.push(s3); - s3 = peg$parseSingleQuotedCharacter(); + if (s0 === peg$FAILED) { + s0 = peg$currPos; + if (input.charCodeAt(peg$currPos) === 92) { + s1 = peg$c107; + peg$currPos++; + } else { + s1 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$c108); } } - if (s2 !== peg$FAILED) { - if (input.charCodeAt(peg$currPos) === 39) { - s3 = peg$c105; - peg$currPos++; - } else { - s3 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$c106); } - } - if (s3 !== peg$FAILED) { + if (s1 !== peg$FAILED) { + s2 = peg$parseEscapeSequence(); + if (s2 !== peg$FAILED) { peg$reportedPos = s0; - s1 = peg$c101(s2); + s1 = peg$c110(s2); s0 = s1; } else { peg$currPos = s0; @@ -2116,38 +2131,15 @@ module.exports = (function() { peg$currPos = s0; s0 = peg$c0; } - } else { - peg$currPos = s0; - s0 = peg$c0; - } - - return s0; - } - - function peg$parseSingleQuotedCharacter() { - var s0; - - s0 = peg$parseSimpleSingleQuotedCharacter(); - if (s0 === peg$FAILED) { - s0 = peg$parseSimpleEscapeSequence(); if (s0 === peg$FAILED) { - s0 = peg$parseZeroEscapeSequence(); - if (s0 === peg$FAILED) { - s0 = peg$parseHexEscapeSequence(); - if (s0 === peg$FAILED) { - s0 = peg$parseUnicodeEscapeSequence(); - if (s0 === peg$FAILED) { - s0 = peg$parseEOLEscapeSequence(); - } - } - } + s0 = peg$parseLineContinuation(); } } return s0; } - function peg$parseSimpleSingleQuotedCharacter() { + function peg$parseSingleStringCharacter() { var s0, s1, s2; s0 = peg$currPos; @@ -2162,11 +2154,11 @@ module.exports = (function() { } if (s2 === peg$FAILED) { if (input.charCodeAt(peg$currPos) === 92) { - s2 = peg$c102; + s2 = peg$c107; peg$currPos++; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$c103); } + if (peg$silentFails === 0) { peg$fail(peg$c108); } } if (s2 === peg$FAILED) { s2 = peg$parseLineTerminator(); @@ -2180,16 +2172,10 @@ module.exports = (function() { s1 = peg$c0; } if (s1 !== peg$FAILED) { - if (input.length > peg$currPos) { - s2 = input.charAt(peg$currPos); - peg$currPos++; - } else { - s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$c48); } - } + s2 = peg$parseSourceCharacter(); if (s2 !== peg$FAILED) { peg$reportedPos = s0; - s1 = peg$c104(s2); + s1 = peg$c109(); s0 = s1; } else { peg$currPos = s0; @@ -2199,29 +2185,56 @@ module.exports = (function() { peg$currPos = s0; s0 = peg$c0; } + if (s0 === peg$FAILED) { + s0 = peg$currPos; + if (input.charCodeAt(peg$currPos) === 92) { + s1 = peg$c107; + peg$currPos++; + } else { + s1 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$c108); } + } + if (s1 !== peg$FAILED) { + s2 = peg$parseEscapeSequence(); + if (s2 !== peg$FAILED) { + peg$reportedPos = s0; + s1 = peg$c110(s2); + s0 = s1; + } else { + peg$currPos = s0; + s0 = peg$c0; + } + } else { + peg$currPos = s0; + s0 = peg$c0; + } + if (s0 === peg$FAILED) { + s0 = peg$parseLineContinuation(); + } + } return s0; } - function peg$parseClass() { + function peg$parseCharacterClassMatcher() { var s0, s1, s2, s3, s4, s5; peg$silentFails++; s0 = peg$currPos; if (input.charCodeAt(peg$currPos) === 91) { - s1 = peg$c108; + s1 = peg$c112; peg$currPos++; } else { s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$c109); } + if (peg$silentFails === 0) { peg$fail(peg$c113); } } if (s1 !== peg$FAILED) { if (input.charCodeAt(peg$currPos) === 94) { - s2 = peg$c110; + s2 = peg$c114; peg$currPos++; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$c111); } + if (peg$silentFails === 0) { peg$fail(peg$c115); } } if (s2 === peg$FAILED) { s2 = peg$c1; @@ -2230,37 +2243,37 @@ module.exports = (function() { s3 = []; s4 = peg$parseClassCharacterRange(); if (s4 === peg$FAILED) { - s4 = peg$parseBracketDelimitedCharacter(); + s4 = peg$parseClassCharacter(); } while (s4 !== peg$FAILED) { s3.push(s4); s4 = peg$parseClassCharacterRange(); if (s4 === peg$FAILED) { - s4 = peg$parseBracketDelimitedCharacter(); + s4 = peg$parseClassCharacter(); } } if (s3 !== peg$FAILED) { if (input.charCodeAt(peg$currPos) === 93) { - s4 = peg$c112; + s4 = peg$c116; peg$currPos++; } else { s4 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$c113); } + if (peg$silentFails === 0) { peg$fail(peg$c117); } } if (s4 !== peg$FAILED) { if (input.charCodeAt(peg$currPos) === 105) { - s5 = peg$c94; + s5 = peg$c98; peg$currPos++; } else { s5 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$c95); } + if (peg$silentFails === 0) { peg$fail(peg$c99); } } if (s5 === peg$FAILED) { s5 = peg$c1; } if (s5 !== peg$FAILED) { peg$reportedPos = s0; - s1 = peg$c114(s2, s3, s5); + s1 = peg$c118(s2, s3, s5); s0 = s1; } else { peg$currPos = s0; @@ -2285,7 +2298,7 @@ module.exports = (function() { peg$silentFails--; if (s0 === peg$FAILED) { s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$c107); } + if (peg$silentFails === 0) { peg$fail(peg$c111); } } return s0; @@ -2295,20 +2308,20 @@ module.exports = (function() { var s0, s1, s2, s3; s0 = peg$currPos; - s1 = peg$parseBracketDelimitedCharacter(); + s1 = peg$parseClassCharacter(); if (s1 !== peg$FAILED) { if (input.charCodeAt(peg$currPos) === 45) { - s2 = peg$c115; + s2 = peg$c119; peg$currPos++; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$c116); } + if (peg$silentFails === 0) { peg$fail(peg$c120); } } if (s2 !== peg$FAILED) { - s3 = peg$parseBracketDelimitedCharacter(); + s3 = peg$parseClassCharacter(); if (s3 !== peg$FAILED) { peg$reportedPos = s0; - s1 = peg$c117(s1, s3); + s1 = peg$c121(s1, s3); s0 = s1; } else { peg$currPos = s0; @@ -2326,49 +2339,26 @@ module.exports = (function() { return s0; } - function peg$parseBracketDelimitedCharacter() { - var s0; - - s0 = peg$parseSimpleBracketDelimitedCharacter(); - if (s0 === peg$FAILED) { - s0 = peg$parseSimpleEscapeSequence(); - if (s0 === peg$FAILED) { - s0 = peg$parseZeroEscapeSequence(); - if (s0 === peg$FAILED) { - s0 = peg$parseHexEscapeSequence(); - if (s0 === peg$FAILED) { - s0 = peg$parseUnicodeEscapeSequence(); - if (s0 === peg$FAILED) { - s0 = peg$parseEOLEscapeSequence(); - } - } - } - } - } - - return s0; - } - - function peg$parseSimpleBracketDelimitedCharacter() { + function peg$parseClassCharacter() { var s0, s1, s2; s0 = peg$currPos; s1 = peg$currPos; peg$silentFails++; if (input.charCodeAt(peg$currPos) === 93) { - s2 = peg$c112; + s2 = peg$c116; peg$currPos++; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$c113); } + if (peg$silentFails === 0) { peg$fail(peg$c117); } } if (s2 === peg$FAILED) { if (input.charCodeAt(peg$currPos) === 92) { - s2 = peg$c102; + s2 = peg$c107; peg$currPos++; } else { s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$c103); } + if (peg$silentFails === 0) { peg$fail(peg$c108); } } if (s2 === peg$FAILED) { s2 = peg$parseLineTerminator(); @@ -2382,16 +2372,10 @@ module.exports = (function() { s1 = peg$c0; } if (s1 !== peg$FAILED) { - if (input.length > peg$currPos) { - s2 = input.charAt(peg$currPos); - peg$currPos++; - } else { - s2 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$c48); } - } + s2 = peg$parseSourceCharacter(); if (s2 !== peg$FAILED) { peg$reportedPos = s0; - s1 = peg$c104(s2); + s1 = peg$c109(); s0 = s1; } else { peg$currPos = s0; @@ -2401,64 +2385,93 @@ module.exports = (function() { peg$currPos = s0; s0 = peg$c0; } + if (s0 === peg$FAILED) { + s0 = peg$currPos; + if (input.charCodeAt(peg$currPos) === 92) { + s1 = peg$c107; + peg$currPos++; + } else { + s1 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$c108); } + } + if (s1 !== peg$FAILED) { + s2 = peg$parseEscapeSequence(); + if (s2 !== peg$FAILED) { + peg$reportedPos = s0; + s1 = peg$c110(s2); + s0 = s1; + } else { + peg$currPos = s0; + s0 = peg$c0; + } + } else { + peg$currPos = s0; + s0 = peg$c0; + } + if (s0 === peg$FAILED) { + s0 = peg$parseLineContinuation(); + } + } return s0; } - function peg$parseSimpleEscapeSequence() { - var s0, s1, s2, s3; + function peg$parseLineContinuation() { + var s0, s1, s2; s0 = peg$currPos; if (input.charCodeAt(peg$currPos) === 92) { - s1 = peg$c102; + s1 = peg$c107; peg$currPos++; } else { s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$c103); } + if (peg$silentFails === 0) { peg$fail(peg$c108); } } if (s1 !== peg$FAILED) { - s2 = peg$currPos; - peg$silentFails++; - s3 = peg$parseDigit(); - if (s3 === peg$FAILED) { - if (input.charCodeAt(peg$currPos) === 120) { - s3 = peg$c118; - peg$currPos++; - } else { - s3 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$c119); } - } - if (s3 === peg$FAILED) { - if (input.charCodeAt(peg$currPos) === 117) { - s3 = peg$c120; - peg$currPos++; - } else { - s3 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$c121); } - } - if (s3 === peg$FAILED) { - s3 = peg$parseLineTerminator(); - } - } + s2 = peg$parseLineTerminatorSequence(); + if (s2 !== peg$FAILED) { + peg$reportedPos = s0; + s1 = peg$c122(); + s0 = s1; + } else { + peg$currPos = s0; + s0 = peg$c0; } - peg$silentFails--; - if (s3 === peg$FAILED) { - s2 = peg$c38; + } else { + peg$currPos = s0; + s0 = peg$c0; + } + + return s0; + } + + function peg$parseEscapeSequence() { + var s0, s1, s2, s3; + + s0 = peg$parseCharacterEscapeSequence(); + if (s0 === peg$FAILED) { + s0 = peg$currPos; + if (input.charCodeAt(peg$currPos) === 48) { + s1 = peg$c123; + peg$currPos++; } else { - peg$currPos = s2; - s2 = peg$c0; + s1 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$c124); } } - if (s2 !== peg$FAILED) { - if (input.length > peg$currPos) { - s3 = input.charAt(peg$currPos); - peg$currPos++; + if (s1 !== peg$FAILED) { + s2 = peg$currPos; + peg$silentFails++; + s3 = peg$parseDecimalDigit(); + peg$silentFails--; + if (s3 === peg$FAILED) { + s2 = peg$c38; } else { - s3 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$c48); } + peg$currPos = s2; + s2 = peg$c0; } - if (s3 !== peg$FAILED) { + if (s2 !== peg$FAILED) { peg$reportedPos = s0; - s1 = peg$c122(s3); + s1 = peg$c125(); s0 = s1; } else { peg$currPos = s0; @@ -2468,39 +2481,172 @@ module.exports = (function() { peg$currPos = s0; s0 = peg$c0; } + if (s0 === peg$FAILED) { + s0 = peg$parseHexEscapeSequence(); + if (s0 === peg$FAILED) { + s0 = peg$parseUnicodeEscapeSequence(); + } + } + } + + return s0; + } + + function peg$parseCharacterEscapeSequence() { + var s0; + + s0 = peg$parseSingleEscapeCharacter(); + if (s0 === peg$FAILED) { + s0 = peg$parseNonEscapeCharacter(); + } + + return s0; + } + + function peg$parseSingleEscapeCharacter() { + var s0, s1; + + if (input.charCodeAt(peg$currPos) === 39) { + s0 = peg$c105; + peg$currPos++; } else { - peg$currPos = s0; - s0 = peg$c0; + s0 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$c106); } + } + if (s0 === peg$FAILED) { + if (input.charCodeAt(peg$currPos) === 34) { + s0 = peg$c102; + peg$currPos++; + } else { + s0 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$c103); } + } + if (s0 === peg$FAILED) { + if (input.charCodeAt(peg$currPos) === 92) { + s0 = peg$c107; + peg$currPos++; + } else { + s0 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$c108); } + } + if (s0 === peg$FAILED) { + s0 = peg$currPos; + if (input.charCodeAt(peg$currPos) === 98) { + s1 = peg$c126; + peg$currPos++; + } else { + s1 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$c127); } + } + if (s1 !== peg$FAILED) { + peg$reportedPos = s0; + s1 = peg$c128(); + } + s0 = s1; + if (s0 === peg$FAILED) { + s0 = peg$currPos; + if (input.charCodeAt(peg$currPos) === 102) { + s1 = peg$c129; + peg$currPos++; + } else { + s1 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$c130); } + } + if (s1 !== peg$FAILED) { + peg$reportedPos = s0; + s1 = peg$c131(); + } + s0 = s1; + if (s0 === peg$FAILED) { + s0 = peg$currPos; + if (input.charCodeAt(peg$currPos) === 110) { + s1 = peg$c132; + peg$currPos++; + } else { + s1 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$c133); } + } + if (s1 !== peg$FAILED) { + peg$reportedPos = s0; + s1 = peg$c134(); + } + s0 = s1; + if (s0 === peg$FAILED) { + s0 = peg$currPos; + if (input.charCodeAt(peg$currPos) === 114) { + s1 = peg$c135; + peg$currPos++; + } else { + s1 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$c136); } + } + if (s1 !== peg$FAILED) { + peg$reportedPos = s0; + s1 = peg$c137(); + } + s0 = s1; + if (s0 === peg$FAILED) { + s0 = peg$currPos; + if (input.charCodeAt(peg$currPos) === 116) { + s1 = peg$c138; + peg$currPos++; + } else { + s1 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$c139); } + } + if (s1 !== peg$FAILED) { + peg$reportedPos = s0; + s1 = peg$c140(); + } + s0 = s1; + if (s0 === peg$FAILED) { + s0 = peg$currPos; + if (input.charCodeAt(peg$currPos) === 118) { + s1 = peg$c141; + peg$currPos++; + } else { + s1 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$c142); } + } + if (s1 !== peg$FAILED) { + peg$reportedPos = s0; + s1 = peg$c143(); + } + s0 = s1; + } + } + } + } + } + } + } } return s0; } - function peg$parseZeroEscapeSequence() { - var s0, s1, s2, s3; + function peg$parseNonEscapeCharacter() { + var s0, s1, s2; s0 = peg$currPos; - if (input.substr(peg$currPos, 2) === peg$c123) { - s1 = peg$c123; - peg$currPos += 2; + s1 = peg$currPos; + peg$silentFails++; + s2 = peg$parseEscapeCharacter(); + if (s2 === peg$FAILED) { + s2 = peg$parseLineTerminator(); + } + peg$silentFails--; + if (s2 === peg$FAILED) { + s1 = peg$c38; } else { - s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$c124); } + peg$currPos = s1; + s1 = peg$c0; } if (s1 !== peg$FAILED) { - s2 = peg$currPos; - peg$silentFails++; - s3 = peg$parseDigit(); - peg$silentFails--; - if (s3 === peg$FAILED) { - s2 = peg$c38; - } else { - peg$currPos = s2; - s2 = peg$c0; - } + s2 = peg$parseSourceCharacter(); if (s2 !== peg$FAILED) { peg$reportedPos = s0; - s1 = peg$c125(); + s1 = peg$c109(); s0 = s1; } else { peg$currPos = s0; @@ -2514,16 +2660,45 @@ module.exports = (function() { return s0; } + function peg$parseEscapeCharacter() { + var s0; + + s0 = peg$parseSingleEscapeCharacter(); + if (s0 === peg$FAILED) { + s0 = peg$parseDecimalDigit(); + if (s0 === peg$FAILED) { + if (input.charCodeAt(peg$currPos) === 120) { + s0 = peg$c144; + peg$currPos++; + } else { + s0 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$c145); } + } + if (s0 === peg$FAILED) { + if (input.charCodeAt(peg$currPos) === 117) { + s0 = peg$c146; + peg$currPos++; + } else { + s0 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$c147); } + } + } + } + } + + return s0; + } + function peg$parseHexEscapeSequence() { var s0, s1, s2, s3, s4, s5; s0 = peg$currPos; - if (input.substr(peg$currPos, 2) === peg$c126) { - s1 = peg$c126; - peg$currPos += 2; + if (input.charCodeAt(peg$currPos) === 120) { + s1 = peg$c144; + peg$currPos++; } else { s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$c127); } + if (peg$silentFails === 0) { peg$fail(peg$c145); } } if (s1 !== peg$FAILED) { s2 = peg$currPos; @@ -2548,7 +2723,7 @@ module.exports = (function() { s2 = s3; if (s2 !== peg$FAILED) { peg$reportedPos = s0; - s1 = peg$c128(s2); + s1 = peg$c148(s2); s0 = s1; } else { peg$currPos = s0; @@ -2566,12 +2741,12 @@ module.exports = (function() { var s0, s1, s2, s3, s4, s5, s6, s7; s0 = peg$currPos; - if (input.substr(peg$currPos, 2) === peg$c129) { - s1 = peg$c129; - peg$currPos += 2; + if (input.charCodeAt(peg$currPos) === 117) { + s1 = peg$c146; + peg$currPos++; } else { s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$c130); } + if (peg$silentFails === 0) { peg$fail(peg$c147); } } if (s1 !== peg$FAILED) { s2 = peg$currPos; @@ -2608,36 +2783,7 @@ module.exports = (function() { s2 = s3; if (s2 !== peg$FAILED) { peg$reportedPos = s0; - s1 = peg$c128(s2); - s0 = s1; - } else { - peg$currPos = s0; - s0 = peg$c0; - } - } else { - peg$currPos = s0; - s0 = peg$c0; - } - - return s0; - } - - function peg$parseEOLEscapeSequence() { - var s0, s1, s2; - - s0 = peg$currPos; - if (input.charCodeAt(peg$currPos) === 92) { - s1 = peg$c102; - peg$currPos++; - } else { - s1 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$c103); } - } - if (s1 !== peg$FAILED) { - s2 = peg$parseLineTerminatorSequence(); - if (s2 !== peg$FAILED) { - peg$reportedPos = s0; - s1 = peg$c131(s2); + s1 = peg$c148(s2); s0 = s1; } else { peg$currPos = s0; @@ -2651,15 +2797,15 @@ module.exports = (function() { return s0; } - function peg$parseDigit() { + function peg$parseDecimalDigit() { var s0; - if (peg$c132.test(input.charAt(peg$currPos))) { + if (peg$c149.test(input.charAt(peg$currPos))) { s0 = input.charAt(peg$currPos); peg$currPos++; } else { s0 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$c133); } + if (peg$silentFails === 0) { peg$fail(peg$c150); } } return s0; @@ -2668,51 +2814,12 @@ module.exports = (function() { function peg$parseHexDigit() { var s0; - if (peg$c134.test(input.charAt(peg$currPos))) { - s0 = input.charAt(peg$currPos); - peg$currPos++; - } else { - s0 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$c135); } - } - - return s0; - } - - function peg$parseLetter() { - var s0; - - s0 = peg$parseLowerCaseLetter(); - if (s0 === peg$FAILED) { - s0 = peg$parseUpperCaseLetter(); - } - - return s0; - } - - function peg$parseLowerCaseLetter() { - var s0; - - if (peg$c136.test(input.charAt(peg$currPos))) { - s0 = input.charAt(peg$currPos); - peg$currPos++; - } else { - s0 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$c137); } - } - - return s0; - } - - function peg$parseUpperCaseLetter() { - var s0; - - if (peg$c138.test(input.charAt(peg$currPos))) { + if (peg$c151.test(input.charAt(peg$currPos))) { s0 = input.charAt(peg$currPos); peg$currPos++; } else { s0 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$c139); } + if (peg$silentFails === 0) { peg$fail(peg$c152); } } return s0; @@ -2721,12 +2828,12 @@ module.exports = (function() { function peg$parseZs() { var s0; - if (peg$c140.test(input.charAt(peg$currPos))) { + if (peg$c153.test(input.charAt(peg$currPos))) { s0 = input.charAt(peg$currPos); peg$currPos++; } else { s0 = peg$FAILED; - if (peg$silentFails === 0) { peg$fail(peg$c141); } + if (peg$silentFails === 0) { peg$fail(peg$c154); } } return s0; diff --git a/spec/parser.spec.js b/spec/parser.spec.js index 5a69b0d..8be8cb4 100644 --- a/spec/parser.spec.js +++ b/spec/parser.spec.js @@ -428,16 +428,16 @@ describe("PEG.js grammar parser", function() { expect('start = abcd').toParseAs(ruleRefGrammar("abcd")); }); - /* Canonical Literal is "\"abcd\"". */ - it("parses Literal", function() { - expect('start = "abcd"' ).toParseAs(literalGrammar("abcd")); - expect("start = 'abcd'" ).toParseAs(literalGrammar("abcd")); + /* Trivial character class rules are not tested. */ + /* Canonical LiteralMatcher is "\"abcd\"". */ + it("parses LiteralMatcher", function() { + expect('start = "abcd"' ).toParseAs(literalGrammar("abcd")); expect('start = "abcd"i').toParseAs(literalGrammar("abcd", true)); }); - /* Canonical String is "\"abcd\"". */ - it("parses String", function() { + /* Canonical StringLiteral is "\"abcd\"". */ + it("parses StringLiteral", function() { var grammar = oneRuleGrammar({ type: "named", name: "abcd", @@ -448,67 +448,42 @@ describe("PEG.js grammar parser", function() { expect('start \'abcd\' = "abcd"').toParseAs(grammar); }); - /* Canonical DoubleQuotedString is "\"abcd\"". */ - it("parses DoubleQuotedString", function() { - expect('start = ""' ).toParseAs(literalGrammar("")); - expect('start = "a"' ).toParseAs(literalGrammar("a")); - expect('start = "abc"').toParseAs(literalGrammar("abc")); - }); - - /* Canonical DoubleQuotedCharacter is "a". */ - it("parses DoubleQuotedCharacter", function() { - expect('start = "a"' ).toParseAs(literalGrammar("a")); - expect('start = "\\n"' ).toParseAs(literalGrammar("\n")); - expect('start = "\\0"' ).toParseAs(literalGrammar("\x00")); - expect('start = "\\xFF"' ).toParseAs(literalGrammar("\xFF")); - expect('start = "\\uFFFF"').toParseAs(literalGrammar("\uFFFF")); - expect('start = "\\\n"' ).toParseAs(literalGrammar("")); - }); - - /* Canonical SimpleDoubleQuotedCharacter is "a". */ - it("parses SimpleDoubleQuotedCharacter", function() { - expect('start = "a"').toParseAs(literalGrammar("a")); + /* Canonical DoubleStringCharacter is "a". */ + it("parses DoubleStringCharacter", function() { + expect('start = "a"' ).toParseAs(literalGrammar("a")); + expect('start = "\\n"' ).toParseAs(literalGrammar("\n")); + expect('start = "\\\n"').toParseAs(literalGrammar("")); expect('start = """' ).toFailToParse(); expect('start = "\\"').toFailToParse(); expect('start = "\n"').toFailToParse(); }); - /* Canonical SingleQuotedString is "'abcd'". */ - it("parses SingleQuotedString", function() { - expect("start = ''" ).toParseAs(literalGrammar("")); - expect("start = 'a'" ).toParseAs(literalGrammar("a")); - expect("start = 'abc'").toParseAs(literalGrammar("abc")); - }); - - /* Canonical SingleQuotedCharacter is "a". */ - it("parses SingleQuotedCharacter", function() { - expect("start = 'a'" ).toParseAs(literalGrammar("a")); - expect("start = '\\n'" ).toParseAs(literalGrammar("\n")); - expect("start = '\\0'" ).toParseAs(literalGrammar("\x00")); - expect("start = '\\xFF'" ).toParseAs(literalGrammar("\xFF")); - expect("start = '\\uFFFF'").toParseAs(literalGrammar("\uFFFF")); - expect("start = '\\\n'" ).toParseAs(literalGrammar("")); - }); - - /* Canonical SimpleSingleQuotedCharacter is "a". */ - it("parses SimpleSingleQuotedCharacter", function() { - expect("start = 'a'").toParseAs(literalGrammar("a")); + /* Canonical SingleStringCharacter is "a". */ + it("parses SingleStringCharacter", function() { + expect("start = 'a'" ).toParseAs(literalGrammar("a")); + expect("start = '\\n'" ).toParseAs(literalGrammar("\n")); + expect("start = '\\\n'").toParseAs(literalGrammar("")); expect("start = '''" ).toFailToParse(); expect("start = '\\'").toFailToParse(); expect("start = '\n'").toFailToParse(); }); - /* Canonical Class is "[a-d]". */ - it("parses Class", function() { - expect('start = []' ).toParseAs(classGrammar([], "[]")); - expect('start = [a-d]' ).toParseAs(classGrammar([["a", "d"]], "[a-d]")); - expect('start = [a]' ).toParseAs(classGrammar(["a"], "[a]")); + /* Canonical CharacterClassMatcher is "[a-d]". */ + it("parses CharacterClassMatcher", function() { + expect('start = []').toParseAs( + classGrammar([], "[]") + ); + expect('start = [a-d]').toParseAs( + classGrammar([["a", "d"]], "[a-d]") + ); + expect('start = [a]').toParseAs( + classGrammar(["a"], "[a]") + ); expect('start = [a-de-hi-l]').toParseAs( classGrammar([["a", "d"], ["e", "h"], ["i", "l"]], "[a-de-hi-l]") ); - expect('start = [^a-d]').toParseAs( classGrammar([["a", "d"]], "[^a-d]", true, false) ); @@ -520,8 +495,8 @@ describe("PEG.js grammar parser", function() { /* Canonical ClassCharacterRange is "a-d". */ it("parses ClassCharacterRange", function() { expect('start = [a-d]').toParseAs(classGrammar([["a", "d"]], "[a-d]")); - expect('start = [a-a]').toParseAs(classGrammar([["a", "a"]], "[a-a]")); + expect('start = [a-a]').toParseAs(classGrammar([["a", "a"]], "[a-a]")); expect('start = [b-a]').toFailToParse({ message: "Invalid character range: b-a." }); @@ -529,67 +504,67 @@ describe("PEG.js grammar parser", function() { /* Canonical ClassCharacter is "a". */ it("parses ClassCharacter", function() { - expect('start = [a]').toParseAs(classGrammar(["a"], "[a]")); + expect('start = [a]' ).toParseAs(classGrammar(["a"], "[a]")); + expect('start = [\\n]' ).toParseAs(classGrammar(["\n"], "[\\n]")); + expect('start = [\\\n]').toParseAs(classGrammar([''], "[\\\n]")); + + expect('start = []]' ).toFailToParse(); + expect('start = [\\]').toFailToParse(); + expect('start = [\n]').toFailToParse(); }); - /* Canonical BracketDelimitedCharacter is "a". */ - it("parses BracketDelimitedCharacter", function() { - expect('start = [a]' ).toParseAs(classGrammar(["a"], "[a]")); - expect('start = [\\n]' ).toParseAs(classGrammar(["\n"], "[\\n]")); - expect('start = [\\0]' ).toParseAs(classGrammar(["\x00"], "[\\0]")); - expect('start = [\\xFF]' ).toParseAs(classGrammar(["\xFF"], "[\\xFF]")); - expect('start = [\\uFFFF]').toParseAs(classGrammar(["\uFFFF"], "[\\uFFFF]")); - expect('start = [\\\n]' ).toParseAs(classGrammar([""], "[\\\n]")); + /* Canonical LineContinuation is "\\\n". */ + it("parses LineContinuation", function() { + expect('start = "\\\r\n"').toParseAs(literalGrammar("")); }); - /* Canonical SimpleBracketDelimiedCharacter is "a". */ - it("parses SimpleBracketDelimitedCharacter", function() { - expect('start = [a]').toParseAs(classGrammar(["a"], "[a]")); + /* Canonical EscapeSequence is "n". */ + it("parses EscapeSequence", function() { + expect('start = "\\n"' ).toParseAs(literalGrammar("\n")); + expect('start = "\\0"' ).toParseAs(literalGrammar("\x00")); + expect('start = "\\xFF"' ).toParseAs(literalGrammar("\xFF")); + expect('start = "\\uFFFF"').toParseAs(literalGrammar("\uFFFF")); - expect('start = []]' ).toFailToParse(); - expect('start = [\\]').toFailToParse(); - expect('start = [\n]').toFailToParse(); + expect('start = "\\09"').toFailToParse(); }); - /* Canonical SimpleEscapeSequence is "\\n". */ - it("parses SimpleEscapeSequence", function() { - expect('start = "\\b"').toParseAs(literalGrammar("\b")); - expect('start = "\\f"').toParseAs(literalGrammar("\f")); + /* Canonical CharacterEscapeSequence is "n". */ + it("parses CharacterEscapeSequence", function() { expect('start = "\\n"').toParseAs(literalGrammar("\n")); - expect('start = "\\r"').toParseAs(literalGrammar("\r")); - expect('start = "\\t"').toParseAs(literalGrammar("\t")); - expect('start = "\\v"').toParseAs(literalGrammar("\x0B")); // no "\v" in IE expect('start = "\\a"').toParseAs(literalGrammar("a")); - - expect('start = "\\1"').toFailToParse(); - expect('start = "\\x"').toFailToParse(); - expect('start = "\\u"').toFailToParse(); }); - /* Canonical ZeroEscapeSequence is "\\0". */ - it("parses ZeroEscapeSequence", function() { - expect('start = "\\0"').toParseAs(literalGrammar("\x00")); + /* Canonical SingleEscapeCharacter is "n". */ + it("parses SingleEscapeCharacter", function() { + expect('start = "\\\'"').toParseAs(literalGrammar("'")); + expect('start = "\\""' ).toParseAs(literalGrammar('"')); + expect('start = "\\\\"').toParseAs(literalGrammar("\\")); + expect('start = "\\b"' ).toParseAs(literalGrammar("\b")); + expect('start = "\\f"' ).toParseAs(literalGrammar("\f")); + expect('start = "\\n"' ).toParseAs(literalGrammar("\n")); + expect('start = "\\r"' ).toParseAs(literalGrammar("\r")); + expect('start = "\\t"' ).toParseAs(literalGrammar("\t")); + expect('start = "\\v"' ).toParseAs(literalGrammar("\x0B")); // no "\v" in IE + }); - expect('start = "\\00"').toFailToParse(); - expect('start = "\\09"').toFailToParse(); + /* Canonical NonEscapeCharacter is "a". */ + it("parses NonEscapeCharacter", function() { + expect('start = "\\a"').toParseAs(literalGrammar("a")); }); - /* Canonical HexEscapeSequence is "\\xFF". */ + /* The EscapeCharacter rule is not tested. */ + + /* Canonical HexEscapeSequence is "xFF". */ it("parses HexEscapeSequence", function() { expect('start = "\\xFF"').toParseAs(literalGrammar("\xFF")); }); - /* Canonical UnicodeEscapeSequence is "\\uFFFF". */ + /* Canonical UnicodeEscapeSequence is "uFFFF". */ it("parses UnicodeEscapeSequence", function() { expect('start = "\\uFFFF"').toParseAs(literalGrammar("\uFFFF")); }); - /* Canonical EOLEscapeSequence is "\\\n". */ - it("parses EOLEscapeSequence", function() { - expect('start = "\\\r\n"').toParseAs(literalGrammar("")); - }); - - /* Trivial character class rules are not tested. */ + /* Digit rules are not tested. */ /* Unicode character category rules are not tested. */ diff --git a/src/parser.pegjs b/src/parser.pegjs index 62cab17..ee9dc70 100644 --- a/src/parser.pegjs +++ b/src/parser.pegjs @@ -50,7 +50,7 @@ Initializer Rule = name:Identifier __ - displayName:(String __)? + displayName:(StringLiteral __)? "=" __ expression:Expression (__ ";")? { return { @@ -158,14 +158,14 @@ Suffixed / Primary Primary - = name:Identifier !(__ (String __)? "=") { + = name:Identifier !(__ (StringLiteral __)? "=") { return { type: "rule_ref", name: name }; } - / Literal - / Class + / LiteralMatcher + / CharacterClassMatcher / "." { return { type: "any" }; } / "(" __ expression:Expression __ ")" { return expression; } @@ -216,124 +216,117 @@ NonBraceCharacter = [^{}] Identifier "identifier" - = $((Letter / "_") (Letter / Digit / "_")*) + = $((Letter / "_") (Letter / DecimalDigit / "_")*) -Literal "literal" - = value:(DoubleQuotedString / SingleQuotedString) flags:"i"? { - return { - type: "literal", - value: value, - ignoreCase: flags === "i" - }; - } - -String "string" - = string:(DoubleQuotedString / SingleQuotedString) { return string; } - -DoubleQuotedString - = '"' chars:DoubleQuotedCharacter* '"' { return chars.join(""); } - -DoubleQuotedCharacter - = SimpleDoubleQuotedCharacter - / SimpleEscapeSequence - / ZeroEscapeSequence - / HexEscapeSequence - / UnicodeEscapeSequence - / EOLEscapeSequence - -SimpleDoubleQuotedCharacter - = !('"' / "\\" / LineTerminator) char_:. { return char_; } +Letter + = LowerCaseLetter + / UpperCaseLetter -SingleQuotedString - = "'" chars:SingleQuotedCharacter* "'" { return chars.join(""); } +LowerCaseLetter + = [a-z] -SingleQuotedCharacter - = SimpleSingleQuotedCharacter - / SimpleEscapeSequence - / ZeroEscapeSequence - / HexEscapeSequence - / UnicodeEscapeSequence - / EOLEscapeSequence +UpperCaseLetter + = [A-Z] -SimpleSingleQuotedCharacter - = !("'" / "\\" / LineTerminator) char_:. { return char_; } +LiteralMatcher "literal" + = value:StringLiteral ignoreCase:"i"? { + return { type: "literal", value: value, ignoreCase: ignoreCase !== null }; + } -Class "character class" - = "[" inverted:"^"? parts:(ClassCharacterRange / ClassCharacter)* "]" flags:"i"? { +StringLiteral "string" + = '"' chars:DoubleStringCharacter* '"' { return chars.join(""); } + / "'" chars:SingleStringCharacter* "'" { return chars.join(""); } + +DoubleStringCharacter + = !('"' / "\\" / LineTerminator) SourceCharacter { return text(); } + / "\\" sequence:EscapeSequence { return sequence; } + / LineContinuation + +SingleStringCharacter + = !("'" / "\\" / LineTerminator) SourceCharacter { return text(); } + / "\\" sequence:EscapeSequence { return sequence; } + / LineContinuation + +CharacterClassMatcher "character class" + = "[" + inverted:"^"? + parts:(ClassCharacterRange / ClassCharacter)* + "]" + ignoreCase:"i"? + { return { type: "class", parts: parts, - rawText: text().replace(/\s+$/, ""), - inverted: inverted === "^", - ignoreCase: flags === "i" + inverted: inverted !== null, + ignoreCase: ignoreCase !== null, + rawText: text() }; } ClassCharacterRange = begin:ClassCharacter "-" end:ClassCharacter { if (begin.charCodeAt(0) > end.charCodeAt(0)) { - error("Invalid character range: " + text() + "."); + error( + "Invalid character range: " + text() + "." + ); } return [begin, end]; } ClassCharacter - = BracketDelimitedCharacter + = !("]" / "\\" / LineTerminator) SourceCharacter { return text(); } + / "\\" sequence:EscapeSequence { return sequence; } + / LineContinuation + +LineContinuation + = "\\" LineTerminatorSequence { return ""; } -BracketDelimitedCharacter - = SimpleBracketDelimitedCharacter - / SimpleEscapeSequence - / ZeroEscapeSequence +EscapeSequence + = CharacterEscapeSequence + / "0" !DecimalDigit { return "\0"; } / HexEscapeSequence / UnicodeEscapeSequence - / EOLEscapeSequence - -SimpleBracketDelimitedCharacter - = !("]" / "\\" / LineTerminator) char_:. { return char_; } - -SimpleEscapeSequence - = "\\" !(Digit / "x" / "u" / LineTerminator) char_:. { - return char_ - .replace("b", "\b") - .replace("f", "\f") - .replace("n", "\n") - .replace("r", "\r") - .replace("t", "\t") - .replace("v", "\x0B"); // IE does not recognize "\v". - } -ZeroEscapeSequence - = "\\0" !Digit { return "\x00"; } +CharacterEscapeSequence + = SingleEscapeCharacter + / NonEscapeCharacter + +SingleEscapeCharacter + = "'" + / '"' + / "\\" + / "b" { return "\b"; } + / "f" { return "\f"; } + / "n" { return "\n"; } + / "r" { return "\r"; } + / "t" { return "\t"; } + / "v" { return "\x0B"; } // IE does not recognize "\v". + +NonEscapeCharacter + = !(EscapeCharacter / LineTerminator) SourceCharacter { return text(); } + +EscapeCharacter + = SingleEscapeCharacter + / DecimalDigit + / "x" + / "u" HexEscapeSequence - = "\\x" digits:$(HexDigit HexDigit) { + = "x" digits:$(HexDigit HexDigit) { return String.fromCharCode(parseInt(digits, 16)); } UnicodeEscapeSequence - = "\\u" digits:$(HexDigit HexDigit HexDigit HexDigit) { + = "u" digits:$(HexDigit HexDigit HexDigit HexDigit) { return String.fromCharCode(parseInt(digits, 16)); } -EOLEscapeSequence - = "\\" eol:LineTerminatorSequence { return ""; } - -Digit +DecimalDigit = [0-9] HexDigit - = [0-9a-fA-F] - -Letter - = LowerCaseLetter - / UpperCaseLetter - -LowerCaseLetter - = [a-z] - -UpperCaseLetter - = [A-Z] + = [0-9a-f]i /* * Unicode Character Categories