Implament case-insensitive class matching

redux
David Majda 13 years ago
parent b540b2d460
commit c04af99df8

@ -136,7 +136,7 @@ Match exactly one character and return it as a string.
#### [*characters*]
Match one character from a set and return it as a string. The characters in the list can be escaped in exactly the same way as in JavaScript string. The list of characters can also contain ranges (e.g. `[a-z]` means “all lowercase letters”). Preceding the characters with `^` inverts the matched set (e.g. `[^a-z]` means “all character but lowercase letters”).
Match one character from a set and return it as a string. The characters in the list can be escaped in exactly the same way as in JavaScript string. The list of characters can also contain ranges (e.g. `[a-z]` means “all lowercase letters”). Preceding the characters with `^` inverts the matched set (e.g. `[^a-z]` means “all character but lowercase letters”). Appending `i` right after the right bracket makes the match case-insensitive.
#### *rule*

@ -911,7 +911,7 @@ PEG.compiler.emitter = function(ast) {
+ quoteForRegexpClass(part[1])
: quoteForRegexpClass(part);
}).join('')
+ ']/';
+ ']/' + (node.ignoreCase ? 'i' : '');
} else {
/*
* Stupid IE considers regexps /[]/ and /[^]/ syntactically invalid, so

@ -2272,7 +2272,7 @@ PEG.parser = (function(){
return cachedResult.result;
}
var result0, result1, result2, result3, result4, result5;
var result0, result1, result2, result3, result4, result5, result6;
var pos0, pos1, pos2;
reportFailures++;
@ -2322,9 +2322,24 @@ PEG.parser = (function(){
}
}
if (result3 !== null) {
result4 = parse___();
if (input.charCodeAt(pos) === 105) {
result4 = "i";
pos += 1;
} else {
result4 = null;
if (reportFailures === 0) {
matchFailed("\"i\"");
}
}
result4 = result4 !== null ? result4 : "";
if (result4 !== null) {
result0 = [result0, result1, result2, result3, result4];
result5 = parse___();
if (result5 !== null) {
result0 = [result0, result1, result2, result3, result4, result5];
} else {
result0 = null;
pos = pos1;
}
} else {
result0 = null;
pos = pos1;
@ -2346,21 +2361,23 @@ PEG.parser = (function(){
pos = pos1;
}
if (result0 !== null) {
result0 = (function(inverted, parts) {
result0 = (function(inverted, parts, flags) {
var partsConverted = map(parts, function(part) { return part.data; });
var rawText = "["
+ inverted
+ map(parts, function(part) { return part.rawText; }).join("")
+ "]";
+ "]"
+ flags;
return {
type: "class",
inverted: inverted === "^",
parts: partsConverted,
type: "class",
inverted: inverted === "^",
ignoreCase: flags === "i",
parts: partsConverted,
// FIXME: Get the raw text from the input directly.
rawText: rawText
rawText: rawText
};
})(result0[1], result0[2]);
})(result0[1], result0[2], result0[4]);
}
if (result0 === null) {
pos = pos0;

@ -235,19 +235,21 @@ simpleSingleQuotedCharacter
= !("'" / "\\" / eolChar) char_:. { return char_; }
class "character class"
= "[" inverted:"^"? parts:(classCharacterRange / classCharacter)* "]" __ {
= "[" inverted:"^"? parts:(classCharacterRange / classCharacter)* "]" flags:"i"? __ {
var partsConverted = map(parts, function(part) { return part.data; });
var rawText = "["
+ inverted
+ map(parts, function(part) { return part.rawText; }).join("")
+ "]";
+ "]"
+ flags;
return {
type: "class",
inverted: inverted === "^",
parts: partsConverted,
type: "class",
inverted: inverted === "^",
ignoreCase: flags === "i",
parts: partsConverted,
// FIXME: Get the raw text from the input directly.
rawText: rawText
rawText: rawText
};
}

@ -246,26 +246,62 @@ test("classes", function() {
doesNotParse(emptyClassParser, "a");
doesNotParse(emptyClassParser, "ab");
var nonEmptyClassParser = PEG.buildParser('start = [ab-d]');
parses(nonEmptyClassParser, "a", "a");
parses(nonEmptyClassParser, "b", "b");
parses(nonEmptyClassParser, "c", "c");
parses(nonEmptyClassParser, "d", "d");
doesNotParse(nonEmptyClassParser, "");
doesNotParse(nonEmptyClassParser, "ab");
var invertedEmptyClassParser = PEG.buildParser('start = [^]');
doesNotParse(invertedEmptyClassParser, "");
parses(invertedEmptyClassParser, "a", "a");
doesNotParse(invertedEmptyClassParser, "ab");
var invertedNonEmptyClassParser = PEG.buildParser('start = [^ab-d]');
doesNotParse(invertedNonEmptyClassParser, "a", "a");
doesNotParse(invertedNonEmptyClassParser, "b", "b");
doesNotParse(invertedNonEmptyClassParser, "c", "c");
doesNotParse(invertedNonEmptyClassParser, "d", "d");
doesNotParse(invertedNonEmptyClassParser, "");
doesNotParse(invertedNonEmptyClassParser, "ab");
var nonEmptyCaseSensitiveClassParser = PEG.buildParser('start = [ab-d]');
parses(nonEmptyCaseSensitiveClassParser, "a", "a");
parses(nonEmptyCaseSensitiveClassParser, "b", "b");
parses(nonEmptyCaseSensitiveClassParser, "c", "c");
parses(nonEmptyCaseSensitiveClassParser, "d", "d");
doesNotParse(nonEmptyCaseSensitiveClassParser, "");
doesNotParse(nonEmptyCaseSensitiveClassParser, "A");
doesNotParse(nonEmptyCaseSensitiveClassParser, "B");
doesNotParse(nonEmptyCaseSensitiveClassParser, "C");
doesNotParse(nonEmptyCaseSensitiveClassParser, "D");
doesNotParse(nonEmptyCaseSensitiveClassParser, "e");
doesNotParse(nonEmptyCaseSensitiveClassParser, "ab");
var invertedNonEmptyCaseSensitiveClassParser = PEG.buildParser('start = [^ab-d]');
parses(invertedNonEmptyCaseSensitiveClassParser, "A", "A");
parses(invertedNonEmptyCaseSensitiveClassParser, "B", "B");
parses(invertedNonEmptyCaseSensitiveClassParser, "C", "C");
parses(invertedNonEmptyCaseSensitiveClassParser, "D", "D");
parses(invertedNonEmptyCaseSensitiveClassParser, "e", "e");
doesNotParse(invertedNonEmptyCaseSensitiveClassParser, "a", "a");
doesNotParse(invertedNonEmptyCaseSensitiveClassParser, "b", "b");
doesNotParse(invertedNonEmptyCaseSensitiveClassParser, "c", "c");
doesNotParse(invertedNonEmptyCaseSensitiveClassParser, "d", "d");
doesNotParse(invertedNonEmptyCaseSensitiveClassParser, "");
doesNotParse(invertedNonEmptyCaseSensitiveClassParser, "ab");
var nonEmptyCaseInsensitiveClassParser = PEG.buildParser('start = [ab-d]i');
parses(nonEmptyCaseInsensitiveClassParser, "a", "a");
parses(nonEmptyCaseInsensitiveClassParser, "b", "b");
parses(nonEmptyCaseInsensitiveClassParser, "c", "c");
parses(nonEmptyCaseInsensitiveClassParser, "d", "d");
parses(nonEmptyCaseInsensitiveClassParser, "A", "A");
parses(nonEmptyCaseInsensitiveClassParser, "B", "B");
parses(nonEmptyCaseInsensitiveClassParser, "C", "C");
parses(nonEmptyCaseInsensitiveClassParser, "D", "D");
doesNotParse(nonEmptyCaseInsensitiveClassParser, "");
doesNotParse(nonEmptyCaseInsensitiveClassParser, "e");
doesNotParse(nonEmptyCaseInsensitiveClassParser, "ab");
var invertedNonEmptyCaseInsensitiveClassParser = PEG.buildParser('start = [^ab-d]i');
parses(invertedNonEmptyCaseInsensitiveClassParser, "e", "e");
doesNotParse(invertedNonEmptyCaseInsensitiveClassParser, "a", "a");
doesNotParse(invertedNonEmptyCaseInsensitiveClassParser, "b", "b");
doesNotParse(invertedNonEmptyCaseInsensitiveClassParser, "c", "c");
doesNotParse(invertedNonEmptyCaseInsensitiveClassParser, "d", "d");
doesNotParse(invertedNonEmptyCaseInsensitiveClassParser, "A", "A");
doesNotParse(invertedNonEmptyCaseInsensitiveClassParser, "B", "B");
doesNotParse(invertedNonEmptyCaseInsensitiveClassParser, "C", "C");
doesNotParse(invertedNonEmptyCaseInsensitiveClassParser, "D", "D");
doesNotParse(invertedNonEmptyCaseInsensitiveClassParser, "");
doesNotParse(invertedNonEmptyCaseInsensitiveClassParser, "ab");
/*
* Test that the parsing position moves forward after successful parsing of

@ -95,12 +95,13 @@ function any() {
return { type: "any" };
}
function klass(inverted, parts, rawText) {
function klass(inverted, ignoreCase, parts, rawText) {
return {
type: "class",
inverted: inverted,
parts: parts,
rawText: rawText
type: "class",
inverted: inverted,
ignoreCase: ignoreCase,
parts: parts,
rawText: rawText
};
}
@ -141,7 +142,7 @@ function literalGrammar(literal) {
}
function classGrammar(inverted, parts, rawText) {
return oneRuleGrammar(klass(inverted, parts, rawText));
return oneRuleGrammar(klass(inverted, false, parts, rawText));
}
var anyGrammar = oneRuleGrammar(any());
@ -417,6 +418,10 @@ test("parses class", function() {
"start = [a-de-hi-l]",
classGrammar(false, [["a", "d"], ["e", "h"], ["i", "l"]], "[a-de-hi-l]")
);
parserParses(
"start = [a-d]i",
oneRuleGrammar(klass(false, true, [["a", "d"]], "[a-d]i"))
);
parserParses("start = [a-d]\n", classGrammar(false, [["a", "d"]], "[a-d]"));
});

Loading…
Cancel
Save