From c04af99df80c1e513c121e8de4f3b2c494b583d5 Mon Sep 17 00:00:00 2001 From: David Majda Date: Fri, 30 Sep 2011 11:45:36 +0200 Subject: [PATCH] Implament case-insensitive class matching --- README.md | 2 +- src/emitter.js | 2 +- src/parser.js | 37 +++++++++++++++++------- src/parser.pegjs | 14 +++++---- test/compiler-test.js | 66 +++++++++++++++++++++++++++++++++---------- test/parser-test.js | 17 +++++++---- 6 files changed, 99 insertions(+), 39 deletions(-) diff --git a/README.md b/README.md index 5f023d1..489a506 100644 --- a/README.md +++ b/README.md @@ -136,7 +136,7 @@ Match exactly one character and return it as a string. #### [*characters*] -Match one character from a set and return it as a string. The characters in the list can be escaped in exactly the same way as in JavaScript string. The list of characters can also contain ranges (e.g. `[a-z]` means “all lowercase letters”). Preceding the characters with `^` inverts the matched set (e.g. `[^a-z]` means “all character but lowercase letters”). +Match one character from a set and return it as a string. The characters in the list can be escaped in exactly the same way as in JavaScript string. The list of characters can also contain ranges (e.g. `[a-z]` means “all lowercase letters”). Preceding the characters with `^` inverts the matched set (e.g. `[^a-z]` means “all character but lowercase letters”). Appending `i` right after the right bracket makes the match case-insensitive. #### *rule* diff --git a/src/emitter.js b/src/emitter.js index 3f7d9e9..5b4eaea 100644 --- a/src/emitter.js +++ b/src/emitter.js @@ -911,7 +911,7 @@ PEG.compiler.emitter = function(ast) { + quoteForRegexpClass(part[1]) : quoteForRegexpClass(part); }).join('') - + ']/'; + + ']/' + (node.ignoreCase ? 'i' : ''); } else { /* * Stupid IE considers regexps /[]/ and /[^]/ syntactically invalid, so diff --git a/src/parser.js b/src/parser.js index cc9e68c..9b2a7c5 100644 --- a/src/parser.js +++ b/src/parser.js @@ -2272,7 +2272,7 @@ PEG.parser = (function(){ return cachedResult.result; } - var result0, result1, result2, result3, result4, result5; + var result0, result1, result2, result3, result4, result5, result6; var pos0, pos1, pos2; reportFailures++; @@ -2322,9 +2322,24 @@ PEG.parser = (function(){ } } if (result3 !== null) { - result4 = parse___(); + if (input.charCodeAt(pos) === 105) { + result4 = "i"; + pos += 1; + } else { + result4 = null; + if (reportFailures === 0) { + matchFailed("\"i\""); + } + } + result4 = result4 !== null ? result4 : ""; if (result4 !== null) { - result0 = [result0, result1, result2, result3, result4]; + result5 = parse___(); + if (result5 !== null) { + result0 = [result0, result1, result2, result3, result4, result5]; + } else { + result0 = null; + pos = pos1; + } } else { result0 = null; pos = pos1; @@ -2346,21 +2361,23 @@ PEG.parser = (function(){ pos = pos1; } if (result0 !== null) { - result0 = (function(inverted, parts) { + result0 = (function(inverted, parts, flags) { var partsConverted = map(parts, function(part) { return part.data; }); var rawText = "[" + inverted + map(parts, function(part) { return part.rawText; }).join("") - + "]"; + + "]" + + flags; return { - type: "class", - inverted: inverted === "^", - parts: partsConverted, + type: "class", + inverted: inverted === "^", + ignoreCase: flags === "i", + parts: partsConverted, // FIXME: Get the raw text from the input directly. - rawText: rawText + rawText: rawText }; - })(result0[1], result0[2]); + })(result0[1], result0[2], result0[4]); } if (result0 === null) { pos = pos0; diff --git a/src/parser.pegjs b/src/parser.pegjs index 5aca67a..a258c7c 100644 --- a/src/parser.pegjs +++ b/src/parser.pegjs @@ -235,19 +235,21 @@ simpleSingleQuotedCharacter = !("'" / "\\" / eolChar) char_:. { return char_; } class "character class" - = "[" inverted:"^"? parts:(classCharacterRange / classCharacter)* "]" __ { + = "[" inverted:"^"? parts:(classCharacterRange / classCharacter)* "]" flags:"i"? __ { var partsConverted = map(parts, function(part) { return part.data; }); var rawText = "[" + inverted + map(parts, function(part) { return part.rawText; }).join("") - + "]"; + + "]" + + flags; return { - type: "class", - inverted: inverted === "^", - parts: partsConverted, + type: "class", + inverted: inverted === "^", + ignoreCase: flags === "i", + parts: partsConverted, // FIXME: Get the raw text from the input directly. - rawText: rawText + rawText: rawText }; } diff --git a/test/compiler-test.js b/test/compiler-test.js index ec2fa39..80693a3 100644 --- a/test/compiler-test.js +++ b/test/compiler-test.js @@ -246,26 +246,62 @@ test("classes", function() { doesNotParse(emptyClassParser, "a"); doesNotParse(emptyClassParser, "ab"); - var nonEmptyClassParser = PEG.buildParser('start = [ab-d]'); - parses(nonEmptyClassParser, "a", "a"); - parses(nonEmptyClassParser, "b", "b"); - parses(nonEmptyClassParser, "c", "c"); - parses(nonEmptyClassParser, "d", "d"); - doesNotParse(nonEmptyClassParser, ""); - doesNotParse(nonEmptyClassParser, "ab"); - var invertedEmptyClassParser = PEG.buildParser('start = [^]'); doesNotParse(invertedEmptyClassParser, ""); parses(invertedEmptyClassParser, "a", "a"); doesNotParse(invertedEmptyClassParser, "ab"); - var invertedNonEmptyClassParser = PEG.buildParser('start = [^ab-d]'); - doesNotParse(invertedNonEmptyClassParser, "a", "a"); - doesNotParse(invertedNonEmptyClassParser, "b", "b"); - doesNotParse(invertedNonEmptyClassParser, "c", "c"); - doesNotParse(invertedNonEmptyClassParser, "d", "d"); - doesNotParse(invertedNonEmptyClassParser, ""); - doesNotParse(invertedNonEmptyClassParser, "ab"); + var nonEmptyCaseSensitiveClassParser = PEG.buildParser('start = [ab-d]'); + parses(nonEmptyCaseSensitiveClassParser, "a", "a"); + parses(nonEmptyCaseSensitiveClassParser, "b", "b"); + parses(nonEmptyCaseSensitiveClassParser, "c", "c"); + parses(nonEmptyCaseSensitiveClassParser, "d", "d"); + doesNotParse(nonEmptyCaseSensitiveClassParser, ""); + doesNotParse(nonEmptyCaseSensitiveClassParser, "A"); + doesNotParse(nonEmptyCaseSensitiveClassParser, "B"); + doesNotParse(nonEmptyCaseSensitiveClassParser, "C"); + doesNotParse(nonEmptyCaseSensitiveClassParser, "D"); + doesNotParse(nonEmptyCaseSensitiveClassParser, "e"); + doesNotParse(nonEmptyCaseSensitiveClassParser, "ab"); + + var invertedNonEmptyCaseSensitiveClassParser = PEG.buildParser('start = [^ab-d]'); + parses(invertedNonEmptyCaseSensitiveClassParser, "A", "A"); + parses(invertedNonEmptyCaseSensitiveClassParser, "B", "B"); + parses(invertedNonEmptyCaseSensitiveClassParser, "C", "C"); + parses(invertedNonEmptyCaseSensitiveClassParser, "D", "D"); + parses(invertedNonEmptyCaseSensitiveClassParser, "e", "e"); + doesNotParse(invertedNonEmptyCaseSensitiveClassParser, "a", "a"); + doesNotParse(invertedNonEmptyCaseSensitiveClassParser, "b", "b"); + doesNotParse(invertedNonEmptyCaseSensitiveClassParser, "c", "c"); + doesNotParse(invertedNonEmptyCaseSensitiveClassParser, "d", "d"); + doesNotParse(invertedNonEmptyCaseSensitiveClassParser, ""); + doesNotParse(invertedNonEmptyCaseSensitiveClassParser, "ab"); + + var nonEmptyCaseInsensitiveClassParser = PEG.buildParser('start = [ab-d]i'); + parses(nonEmptyCaseInsensitiveClassParser, "a", "a"); + parses(nonEmptyCaseInsensitiveClassParser, "b", "b"); + parses(nonEmptyCaseInsensitiveClassParser, "c", "c"); + parses(nonEmptyCaseInsensitiveClassParser, "d", "d"); + parses(nonEmptyCaseInsensitiveClassParser, "A", "A"); + parses(nonEmptyCaseInsensitiveClassParser, "B", "B"); + parses(nonEmptyCaseInsensitiveClassParser, "C", "C"); + parses(nonEmptyCaseInsensitiveClassParser, "D", "D"); + doesNotParse(nonEmptyCaseInsensitiveClassParser, ""); + doesNotParse(nonEmptyCaseInsensitiveClassParser, "e"); + doesNotParse(nonEmptyCaseInsensitiveClassParser, "ab"); + + var invertedNonEmptyCaseInsensitiveClassParser = PEG.buildParser('start = [^ab-d]i'); + parses(invertedNonEmptyCaseInsensitiveClassParser, "e", "e"); + doesNotParse(invertedNonEmptyCaseInsensitiveClassParser, "a", "a"); + doesNotParse(invertedNonEmptyCaseInsensitiveClassParser, "b", "b"); + doesNotParse(invertedNonEmptyCaseInsensitiveClassParser, "c", "c"); + doesNotParse(invertedNonEmptyCaseInsensitiveClassParser, "d", "d"); + doesNotParse(invertedNonEmptyCaseInsensitiveClassParser, "A", "A"); + doesNotParse(invertedNonEmptyCaseInsensitiveClassParser, "B", "B"); + doesNotParse(invertedNonEmptyCaseInsensitiveClassParser, "C", "C"); + doesNotParse(invertedNonEmptyCaseInsensitiveClassParser, "D", "D"); + doesNotParse(invertedNonEmptyCaseInsensitiveClassParser, ""); + doesNotParse(invertedNonEmptyCaseInsensitiveClassParser, "ab"); /* * Test that the parsing position moves forward after successful parsing of diff --git a/test/parser-test.js b/test/parser-test.js index 8cb1753..20c86a4 100644 --- a/test/parser-test.js +++ b/test/parser-test.js @@ -95,12 +95,13 @@ function any() { return { type: "any" }; } -function klass(inverted, parts, rawText) { +function klass(inverted, ignoreCase, parts, rawText) { return { - type: "class", - inverted: inverted, - parts: parts, - rawText: rawText + type: "class", + inverted: inverted, + ignoreCase: ignoreCase, + parts: parts, + rawText: rawText }; } @@ -141,7 +142,7 @@ function literalGrammar(literal) { } function classGrammar(inverted, parts, rawText) { - return oneRuleGrammar(klass(inverted, parts, rawText)); + return oneRuleGrammar(klass(inverted, false, parts, rawText)); } var anyGrammar = oneRuleGrammar(any()); @@ -417,6 +418,10 @@ test("parses class", function() { "start = [a-de-hi-l]", classGrammar(false, [["a", "d"], ["e", "h"], ["i", "l"]], "[a-de-hi-l]") ); + parserParses( + "start = [a-d]i", + oneRuleGrammar(klass(false, true, [["a", "d"]], "[a-d]i")) + ); parserParses("start = [a-d]\n", classGrammar(false, [["a", "d"]], "[a-d]")); });