Clean up class handling in the metagrammar and compiler

The class AST node now contains structured data and a raw text which is
used for error messages.
redux
David Majda 14 years ago
parent 137a4b4f53
commit 33a1a7c1e9

@ -723,16 +723,23 @@ PEG.Compiler = {
},
"class": function(node, resultVar) {
/*
* Stupid IE considers regexps /[]/ and /[^]/ syntactically invalid, so we
* translate them into euqivalents it can handle.
*/
if (node.chars === "") {
var regexp = "/^(?!)/";
} else if (node.chars === "^") {
var regexp = "/^[\\S\\s]/";
if (node.parts.length > 0) {
var regexp = "/^["
+ (node.inverted ? "^" : "")
+ PEG.ArrayUtils.map(node.parts, function(part) {
return part instanceof Array
? PEG.RegExpUtils.quoteForClass(part[0])
+ "-"
+ PEG.RegExpUtils.quoteForClass(part[1])
: PEG.RegExpUtils.quoteForClass(part);
}).join("")
+ "]/";
} else {
var regexp = "/^[" + node.chars + "]/";
/*
* Stupid IE considers regexps /[]/ and /[^]/ syntactically invalid, so
* we translate them into euqivalents it can handle.
*/
var regexp = node.inverted ? "/^[\\S\\s]/" : "/^(?!)/";
}
return PEG.Compiler.formatCode(
@ -742,12 +749,12 @@ PEG.Compiler = {
"} else {",
" var ${resultVar} = null;",
" if (context.reportMatchFailures) {",
" this._matchFailed('[' + ${chars|string} + ']');",
" this._matchFailed(${rawText|string});",
" }",
"}",
{
chars: node.chars,
regexp: regexp,
rawText: node.rawText,
resultVar: resultVar
}
);

File diff suppressed because it is too large Load Diff

@ -68,7 +68,7 @@ primary
: identifier !(( literal / "") colon) { return { type: "rule_ref", name: $1 }; }
/ literal { return { type: "literal", value: $1 }; }
/ dot { return { type: "any" }; }
/ class { return { type: "class", chars: $1 }; }
/ class
/ lparen expression rparen { return $2; }
/* "Lexical" elements */
@ -142,27 +142,41 @@ singleQuotedCharacter
simpleSingleQuotedCharacter: !("'" / "\\" / eolChar) . { return $2; }
class "character class": "[" "^"? (classCharacterRange / classCharacter)* "]" __ {
return $2 + $3.join("");
parts = PEG.ArrayUtils.map($3, function(part) { return part.data; });
rawText = "["
+ $2
+ PEG.ArrayUtils.map($3, function(part) { return part.rawText; }).join("")
+ "]";
return {
type: "class",
inverted: $2 === "^",
parts: parts,
// FIXME: Get the raw text from the input directly.
rawText: rawText
};
}
classCharacterRange: bracketDelimitedCharacter "-" bracketDelimitedCharacter {
if ($1.charCodeAt(0) > $3.charCodeAt(0)) {
classCharacterRange: classCharacter "-" classCharacter {
if ($1.data.charCodeAt(0) > $3.data.charCodeAt(0)) {
throw new this.SyntaxError(
"Invalid character range: "
+ PEG.RegExpUtils.quoteForClass($1)
+ "-"
+ PEG.RegExpUtils.quoteForClass($3)
+ "."
"Invalid character range: " + $1.rawText + "-" + $2.rawText + "."
);
}
return PEG.RegExpUtils.quoteForClass($1)
+ "-"
+ PEG.RegExpUtils.quoteForClass($3);
return {
data: [$1.data, $3.data],
// FIXME: Get the raw text from the input directly.
rawText: $1.rawText + "-" + $3.rawText
}
}
classCharacter: bracketDelimitedCharacter {
return PEG.RegExpUtils.quoteForClass($1);
return {
data: $1,
// FIXME: Get the raw text from the input directly.
rawText: PEG.RegExpUtils.quoteForClass($1)
};
}
bracketDelimitedCharacter

@ -81,10 +81,12 @@ function any() {
return { type: "any" };
}
function klass(chars) {
function klass(inverted, parts, rawText) {
return {
type: "class",
chars: chars
type: "class",
inverted: inverted,
parts: parts,
rawText: rawText
};
}
@ -119,8 +121,8 @@ function literalGrammar(literal) {
return oneRuleGrammar(literal_(literal));
}
function classGrammar(chars) {
return oneRuleGrammar(klass(chars));
function classGrammar(inverted, parts, rawText) {
return oneRuleGrammar(klass(inverted, parts, rawText));
}
var anyGrammar = oneRuleGrammar(any());
@ -224,7 +226,7 @@ test("parses primary", function() {
grammarParserParses('start: a', identifierGrammar("a"));
grammarParserParses('start: "abcd"', literalGrammar("abcd"));
grammarParserParses('start: .', anyGrammar);
grammarParserParses('start: [a-d]', classGrammar("a-d"));
grammarParserParses('start: [a-d]', classGrammar(false, [["a", "d"]], "[a-d]"));
grammarParserParses('start: ("abcd")', literalGrammar("abcd"));
});
@ -334,41 +336,44 @@ test("parses simpleSingleQuotedCharacter", function() {
/* Canonical class is "[a-d]". */
test("parses class", function() {
grammarParserParses("start: []", classGrammar(""));
grammarParserParses("start: [a-d]", classGrammar("a-d"));
grammarParserParses("start: [^a-d]", classGrammar("^a-d"));
grammarParserParses("start: [a]", classGrammar("a"));
grammarParserParses("start: [a-de-hi-l]", classGrammar("a-de-hi-l"));
grammarParserParses("start: []", classGrammar(false, [], "[]"));
grammarParserParses("start: [a-d]", classGrammar(false, [["a", "d"]], "[a-d]"));
grammarParserParses("start: [^a-d]", classGrammar(true, [["a", "d"]], "[^a-d]"));
grammarParserParses("start: [a]", classGrammar(false, ["a"], "[a]"));
grammarParserParses(
"start: [a-de-hi-l]",
classGrammar(false, [["a", "d"], ["e", "h"], ["i", "l"]], "[a-de-hi-l]")
);
grammarParserParses("start: [a-d]\n", classGrammar("a-d"));
grammarParserParses("start: [a-d]\n", classGrammar(false, [["a", "d"]], "[a-d"]));
});
/* Canonical classCharacterRange is "a-d". */
test("parses classCharacterRange", function() {
grammarParserParses("start: [a-d]", classGrammar("a-d"));
grammarParserParses("start: [a-a]", classGrammar("a-a"));
grammarParserParses("start: [a-d]", classGrammar(false, [["a", "d"]], "[a-d]"));
grammarParserParses("start: [a-a]", classGrammar(false, [["a", "a"]], "[a-d]"));
grammarParserDoesNotParse("start: [b-a]");
});
/* Canonical classCharacter is "a". */
test("parses classCharacter", function() {
grammarParserParses("start: [a]", classGrammar("a"));
grammarParserParses("start: [a]", classGrammar(false, ["a"], "[a]"));
});
/* Canonical bracketDelimitedCharacter is "a". */
test("parses bracketDelimitedCharacter", function() {
grammarParserParses("start: [a]", classGrammar("a"));
grammarParserParses("start: [\\n]", classGrammar("\\n"));
grammarParserParses("start: [\\0]", classGrammar("\\0"));
grammarParserParses("start: [\\x00]", classGrammar("\\0"));
grammarParserParses("start: [\\u0120]", classGrammar("\u0120"));
grammarParserParses("start: [\\\n]", classGrammar("\\n"));
grammarParserParses("start: [a]", classGrammar(false, ["a"]));
grammarParserParses("start: [\\n]", classGrammar(false, ["\n"]));
grammarParserParses("start: [\\0]", classGrammar(false, ["\0"]));
grammarParserParses("start: [\\x00]", classGrammar(false, ["\0"]));
grammarParserParses("start: [\\u0120]", classGrammar(false, ["\u0120"]));
grammarParserParses("start: [\\\n]", classGrammar(false, ["\n"]));
});
/* Canonical simpleBracketDelimiedCharacter is "a". */
test("parses simpleBracketDelimitedCharacter", function() {
grammarParserParses("start: [a]", classGrammar("a"));
grammarParserParses("start: [[]", classGrammar("["));
grammarParserParses("start: [a]", classGrammar(false, ["a"]));
grammarParserParses("start: [[]", classGrammar(false, ["["]));
grammarParserDoesNotParse("start: []]");
grammarParserDoesNotParse("start: [\\]");
grammarParserDoesNotParse("start: [\n]");

Loading…
Cancel
Save