Implament case-insensitive class matching

redux
David Majda 13 years ago
parent b540b2d460
commit c04af99df8

@ -136,7 +136,7 @@ Match exactly one character and return it as a string.
#### [*characters*] #### [*characters*]
Match one character from a set and return it as a string. The characters in the list can be escaped in exactly the same way as in JavaScript string. The list of characters can also contain ranges (e.g. `[a-z]` means “all lowercase letters”). Preceding the characters with `^` inverts the matched set (e.g. `[^a-z]` means “all character but lowercase letters”). Match one character from a set and return it as a string. The characters in the list can be escaped in exactly the same way as in JavaScript string. The list of characters can also contain ranges (e.g. `[a-z]` means “all lowercase letters”). Preceding the characters with `^` inverts the matched set (e.g. `[^a-z]` means “all character but lowercase letters”). Appending `i` right after the right bracket makes the match case-insensitive.
#### *rule* #### *rule*

@ -911,7 +911,7 @@ PEG.compiler.emitter = function(ast) {
+ quoteForRegexpClass(part[1]) + quoteForRegexpClass(part[1])
: quoteForRegexpClass(part); : quoteForRegexpClass(part);
}).join('') }).join('')
+ ']/'; + ']/' + (node.ignoreCase ? 'i' : '');
} else { } else {
/* /*
* Stupid IE considers regexps /[]/ and /[^]/ syntactically invalid, so * Stupid IE considers regexps /[]/ and /[^]/ syntactically invalid, so

@ -2272,7 +2272,7 @@ PEG.parser = (function(){
return cachedResult.result; return cachedResult.result;
} }
var result0, result1, result2, result3, result4, result5; var result0, result1, result2, result3, result4, result5, result6;
var pos0, pos1, pos2; var pos0, pos1, pos2;
reportFailures++; reportFailures++;
@ -2322,9 +2322,24 @@ PEG.parser = (function(){
} }
} }
if (result3 !== null) { if (result3 !== null) {
result4 = parse___(); if (input.charCodeAt(pos) === 105) {
result4 = "i";
pos += 1;
} else {
result4 = null;
if (reportFailures === 0) {
matchFailed("\"i\"");
}
}
result4 = result4 !== null ? result4 : "";
if (result4 !== null) { if (result4 !== null) {
result0 = [result0, result1, result2, result3, result4]; result5 = parse___();
if (result5 !== null) {
result0 = [result0, result1, result2, result3, result4, result5];
} else {
result0 = null;
pos = pos1;
}
} else { } else {
result0 = null; result0 = null;
pos = pos1; pos = pos1;
@ -2346,21 +2361,23 @@ PEG.parser = (function(){
pos = pos1; pos = pos1;
} }
if (result0 !== null) { if (result0 !== null) {
result0 = (function(inverted, parts) { result0 = (function(inverted, parts, flags) {
var partsConverted = map(parts, function(part) { return part.data; }); var partsConverted = map(parts, function(part) { return part.data; });
var rawText = "[" var rawText = "["
+ inverted + inverted
+ map(parts, function(part) { return part.rawText; }).join("") + map(parts, function(part) { return part.rawText; }).join("")
+ "]"; + "]"
+ flags;
return { return {
type: "class", type: "class",
inverted: inverted === "^", inverted: inverted === "^",
parts: partsConverted, ignoreCase: flags === "i",
parts: partsConverted,
// FIXME: Get the raw text from the input directly. // FIXME: Get the raw text from the input directly.
rawText: rawText rawText: rawText
}; };
})(result0[1], result0[2]); })(result0[1], result0[2], result0[4]);
} }
if (result0 === null) { if (result0 === null) {
pos = pos0; pos = pos0;

@ -235,19 +235,21 @@ simpleSingleQuotedCharacter
= !("'" / "\\" / eolChar) char_:. { return char_; } = !("'" / "\\" / eolChar) char_:. { return char_; }
class "character class" class "character class"
= "[" inverted:"^"? parts:(classCharacterRange / classCharacter)* "]" __ { = "[" inverted:"^"? parts:(classCharacterRange / classCharacter)* "]" flags:"i"? __ {
var partsConverted = map(parts, function(part) { return part.data; }); var partsConverted = map(parts, function(part) { return part.data; });
var rawText = "[" var rawText = "["
+ inverted + inverted
+ map(parts, function(part) { return part.rawText; }).join("") + map(parts, function(part) { return part.rawText; }).join("")
+ "]"; + "]"
+ flags;
return { return {
type: "class", type: "class",
inverted: inverted === "^", inverted: inverted === "^",
parts: partsConverted, ignoreCase: flags === "i",
parts: partsConverted,
// FIXME: Get the raw text from the input directly. // FIXME: Get the raw text from the input directly.
rawText: rawText rawText: rawText
}; };
} }

@ -246,26 +246,62 @@ test("classes", function() {
doesNotParse(emptyClassParser, "a"); doesNotParse(emptyClassParser, "a");
doesNotParse(emptyClassParser, "ab"); doesNotParse(emptyClassParser, "ab");
var nonEmptyClassParser = PEG.buildParser('start = [ab-d]');
parses(nonEmptyClassParser, "a", "a");
parses(nonEmptyClassParser, "b", "b");
parses(nonEmptyClassParser, "c", "c");
parses(nonEmptyClassParser, "d", "d");
doesNotParse(nonEmptyClassParser, "");
doesNotParse(nonEmptyClassParser, "ab");
var invertedEmptyClassParser = PEG.buildParser('start = [^]'); var invertedEmptyClassParser = PEG.buildParser('start = [^]');
doesNotParse(invertedEmptyClassParser, ""); doesNotParse(invertedEmptyClassParser, "");
parses(invertedEmptyClassParser, "a", "a"); parses(invertedEmptyClassParser, "a", "a");
doesNotParse(invertedEmptyClassParser, "ab"); doesNotParse(invertedEmptyClassParser, "ab");
var invertedNonEmptyClassParser = PEG.buildParser('start = [^ab-d]'); var nonEmptyCaseSensitiveClassParser = PEG.buildParser('start = [ab-d]');
doesNotParse(invertedNonEmptyClassParser, "a", "a"); parses(nonEmptyCaseSensitiveClassParser, "a", "a");
doesNotParse(invertedNonEmptyClassParser, "b", "b"); parses(nonEmptyCaseSensitiveClassParser, "b", "b");
doesNotParse(invertedNonEmptyClassParser, "c", "c"); parses(nonEmptyCaseSensitiveClassParser, "c", "c");
doesNotParse(invertedNonEmptyClassParser, "d", "d"); parses(nonEmptyCaseSensitiveClassParser, "d", "d");
doesNotParse(invertedNonEmptyClassParser, ""); doesNotParse(nonEmptyCaseSensitiveClassParser, "");
doesNotParse(invertedNonEmptyClassParser, "ab"); doesNotParse(nonEmptyCaseSensitiveClassParser, "A");
doesNotParse(nonEmptyCaseSensitiveClassParser, "B");
doesNotParse(nonEmptyCaseSensitiveClassParser, "C");
doesNotParse(nonEmptyCaseSensitiveClassParser, "D");
doesNotParse(nonEmptyCaseSensitiveClassParser, "e");
doesNotParse(nonEmptyCaseSensitiveClassParser, "ab");
var invertedNonEmptyCaseSensitiveClassParser = PEG.buildParser('start = [^ab-d]');
parses(invertedNonEmptyCaseSensitiveClassParser, "A", "A");
parses(invertedNonEmptyCaseSensitiveClassParser, "B", "B");
parses(invertedNonEmptyCaseSensitiveClassParser, "C", "C");
parses(invertedNonEmptyCaseSensitiveClassParser, "D", "D");
parses(invertedNonEmptyCaseSensitiveClassParser, "e", "e");
doesNotParse(invertedNonEmptyCaseSensitiveClassParser, "a", "a");
doesNotParse(invertedNonEmptyCaseSensitiveClassParser, "b", "b");
doesNotParse(invertedNonEmptyCaseSensitiveClassParser, "c", "c");
doesNotParse(invertedNonEmptyCaseSensitiveClassParser, "d", "d");
doesNotParse(invertedNonEmptyCaseSensitiveClassParser, "");
doesNotParse(invertedNonEmptyCaseSensitiveClassParser, "ab");
var nonEmptyCaseInsensitiveClassParser = PEG.buildParser('start = [ab-d]i');
parses(nonEmptyCaseInsensitiveClassParser, "a", "a");
parses(nonEmptyCaseInsensitiveClassParser, "b", "b");
parses(nonEmptyCaseInsensitiveClassParser, "c", "c");
parses(nonEmptyCaseInsensitiveClassParser, "d", "d");
parses(nonEmptyCaseInsensitiveClassParser, "A", "A");
parses(nonEmptyCaseInsensitiveClassParser, "B", "B");
parses(nonEmptyCaseInsensitiveClassParser, "C", "C");
parses(nonEmptyCaseInsensitiveClassParser, "D", "D");
doesNotParse(nonEmptyCaseInsensitiveClassParser, "");
doesNotParse(nonEmptyCaseInsensitiveClassParser, "e");
doesNotParse(nonEmptyCaseInsensitiveClassParser, "ab");
var invertedNonEmptyCaseInsensitiveClassParser = PEG.buildParser('start = [^ab-d]i');
parses(invertedNonEmptyCaseInsensitiveClassParser, "e", "e");
doesNotParse(invertedNonEmptyCaseInsensitiveClassParser, "a", "a");
doesNotParse(invertedNonEmptyCaseInsensitiveClassParser, "b", "b");
doesNotParse(invertedNonEmptyCaseInsensitiveClassParser, "c", "c");
doesNotParse(invertedNonEmptyCaseInsensitiveClassParser, "d", "d");
doesNotParse(invertedNonEmptyCaseInsensitiveClassParser, "A", "A");
doesNotParse(invertedNonEmptyCaseInsensitiveClassParser, "B", "B");
doesNotParse(invertedNonEmptyCaseInsensitiveClassParser, "C", "C");
doesNotParse(invertedNonEmptyCaseInsensitiveClassParser, "D", "D");
doesNotParse(invertedNonEmptyCaseInsensitiveClassParser, "");
doesNotParse(invertedNonEmptyCaseInsensitiveClassParser, "ab");
/* /*
* Test that the parsing position moves forward after successful parsing of * Test that the parsing position moves forward after successful parsing of

@ -95,12 +95,13 @@ function any() {
return { type: "any" }; return { type: "any" };
} }
function klass(inverted, parts, rawText) { function klass(inverted, ignoreCase, parts, rawText) {
return { return {
type: "class", type: "class",
inverted: inverted, inverted: inverted,
parts: parts, ignoreCase: ignoreCase,
rawText: rawText parts: parts,
rawText: rawText
}; };
} }
@ -141,7 +142,7 @@ function literalGrammar(literal) {
} }
function classGrammar(inverted, parts, rawText) { function classGrammar(inverted, parts, rawText) {
return oneRuleGrammar(klass(inverted, parts, rawText)); return oneRuleGrammar(klass(inverted, false, parts, rawText));
} }
var anyGrammar = oneRuleGrammar(any()); var anyGrammar = oneRuleGrammar(any());
@ -417,6 +418,10 @@ test("parses class", function() {
"start = [a-de-hi-l]", "start = [a-de-hi-l]",
classGrammar(false, [["a", "d"], ["e", "h"], ["i", "l"]], "[a-de-hi-l]") classGrammar(false, [["a", "d"], ["e", "h"], ["i", "l"]], "[a-de-hi-l]")
); );
parserParses(
"start = [a-d]i",
oneRuleGrammar(klass(false, true, [["a", "d"]], "[a-d]i"))
);
parserParses("start = [a-d]\n", classGrammar(false, [["a", "d"]], "[a-d]")); parserParses("start = [a-d]\n", classGrammar(false, [["a", "d"]], "[a-d]"));
}); });

Loading…
Cancel
Save