Clean up class handling in the metagrammar and compiler

The class AST node now contains structured data and a raw text which is
used for error messages.
redux
David Majda 14 years ago
parent 137a4b4f53
commit 33a1a7c1e9

@ -723,16 +723,23 @@ PEG.Compiler = {
}, },
"class": function(node, resultVar) { "class": function(node, resultVar) {
/* if (node.parts.length > 0) {
* Stupid IE considers regexps /[]/ and /[^]/ syntactically invalid, so we var regexp = "/^["
* translate them into euqivalents it can handle. + (node.inverted ? "^" : "")
*/ + PEG.ArrayUtils.map(node.parts, function(part) {
if (node.chars === "") { return part instanceof Array
var regexp = "/^(?!)/"; ? PEG.RegExpUtils.quoteForClass(part[0])
} else if (node.chars === "^") { + "-"
var regexp = "/^[\\S\\s]/"; + PEG.RegExpUtils.quoteForClass(part[1])
: PEG.RegExpUtils.quoteForClass(part);
}).join("")
+ "]/";
} else { } else {
var regexp = "/^[" + node.chars + "]/"; /*
* Stupid IE considers regexps /[]/ and /[^]/ syntactically invalid, so
* we translate them into euqivalents it can handle.
*/
var regexp = node.inverted ? "/^[\\S\\s]/" : "/^(?!)/";
} }
return PEG.Compiler.formatCode( return PEG.Compiler.formatCode(
@ -742,12 +749,12 @@ PEG.Compiler = {
"} else {", "} else {",
" var ${resultVar} = null;", " var ${resultVar} = null;",
" if (context.reportMatchFailures) {", " if (context.reportMatchFailures) {",
" this._matchFailed('[' + ${chars|string} + ']');", " this._matchFailed(${rawText|string});",
" }", " }",
"}", "}",
{ {
chars: node.chars,
regexp: regexp, regexp: regexp,
rawText: node.rawText,
resultVar: resultVar resultVar: resultVar
} }
); );

File diff suppressed because it is too large Load Diff

@ -68,7 +68,7 @@ primary
: identifier !(( literal / "") colon) { return { type: "rule_ref", name: $1 }; } : identifier !(( literal / "") colon) { return { type: "rule_ref", name: $1 }; }
/ literal { return { type: "literal", value: $1 }; } / literal { return { type: "literal", value: $1 }; }
/ dot { return { type: "any" }; } / dot { return { type: "any" }; }
/ class { return { type: "class", chars: $1 }; } / class
/ lparen expression rparen { return $2; } / lparen expression rparen { return $2; }
/* "Lexical" elements */ /* "Lexical" elements */
@ -142,27 +142,41 @@ singleQuotedCharacter
simpleSingleQuotedCharacter: !("'" / "\\" / eolChar) . { return $2; } simpleSingleQuotedCharacter: !("'" / "\\" / eolChar) . { return $2; }
class "character class": "[" "^"? (classCharacterRange / classCharacter)* "]" __ { class "character class": "[" "^"? (classCharacterRange / classCharacter)* "]" __ {
return $2 + $3.join(""); parts = PEG.ArrayUtils.map($3, function(part) { return part.data; });
rawText = "["
+ $2
+ PEG.ArrayUtils.map($3, function(part) { return part.rawText; }).join("")
+ "]";
return {
type: "class",
inverted: $2 === "^",
parts: parts,
// FIXME: Get the raw text from the input directly.
rawText: rawText
};
} }
classCharacterRange: bracketDelimitedCharacter "-" bracketDelimitedCharacter { classCharacterRange: classCharacter "-" classCharacter {
if ($1.charCodeAt(0) > $3.charCodeAt(0)) { if ($1.data.charCodeAt(0) > $3.data.charCodeAt(0)) {
throw new this.SyntaxError( throw new this.SyntaxError(
"Invalid character range: " "Invalid character range: " + $1.rawText + "-" + $2.rawText + "."
+ PEG.RegExpUtils.quoteForClass($1)
+ "-"
+ PEG.RegExpUtils.quoteForClass($3)
+ "."
); );
} }
return PEG.RegExpUtils.quoteForClass($1) return {
+ "-" data: [$1.data, $3.data],
+ PEG.RegExpUtils.quoteForClass($3); // FIXME: Get the raw text from the input directly.
rawText: $1.rawText + "-" + $3.rawText
}
} }
classCharacter: bracketDelimitedCharacter { classCharacter: bracketDelimitedCharacter {
return PEG.RegExpUtils.quoteForClass($1); return {
data: $1,
// FIXME: Get the raw text from the input directly.
rawText: PEG.RegExpUtils.quoteForClass($1)
};
} }
bracketDelimitedCharacter bracketDelimitedCharacter

@ -81,10 +81,12 @@ function any() {
return { type: "any" }; return { type: "any" };
} }
function klass(chars) { function klass(inverted, parts, rawText) {
return { return {
type: "class", type: "class",
chars: chars inverted: inverted,
parts: parts,
rawText: rawText
}; };
} }
@ -119,8 +121,8 @@ function literalGrammar(literal) {
return oneRuleGrammar(literal_(literal)); return oneRuleGrammar(literal_(literal));
} }
function classGrammar(chars) { function classGrammar(inverted, parts, rawText) {
return oneRuleGrammar(klass(chars)); return oneRuleGrammar(klass(inverted, parts, rawText));
} }
var anyGrammar = oneRuleGrammar(any()); var anyGrammar = oneRuleGrammar(any());
@ -224,7 +226,7 @@ test("parses primary", function() {
grammarParserParses('start: a', identifierGrammar("a")); grammarParserParses('start: a', identifierGrammar("a"));
grammarParserParses('start: "abcd"', literalGrammar("abcd")); grammarParserParses('start: "abcd"', literalGrammar("abcd"));
grammarParserParses('start: .', anyGrammar); grammarParserParses('start: .', anyGrammar);
grammarParserParses('start: [a-d]', classGrammar("a-d")); grammarParserParses('start: [a-d]', classGrammar(false, [["a", "d"]], "[a-d]"));
grammarParserParses('start: ("abcd")', literalGrammar("abcd")); grammarParserParses('start: ("abcd")', literalGrammar("abcd"));
}); });
@ -334,41 +336,44 @@ test("parses simpleSingleQuotedCharacter", function() {
/* Canonical class is "[a-d]". */ /* Canonical class is "[a-d]". */
test("parses class", function() { test("parses class", function() {
grammarParserParses("start: []", classGrammar("")); grammarParserParses("start: []", classGrammar(false, [], "[]"));
grammarParserParses("start: [a-d]", classGrammar("a-d")); grammarParserParses("start: [a-d]", classGrammar(false, [["a", "d"]], "[a-d]"));
grammarParserParses("start: [^a-d]", classGrammar("^a-d")); grammarParserParses("start: [^a-d]", classGrammar(true, [["a", "d"]], "[^a-d]"));
grammarParserParses("start: [a]", classGrammar("a")); grammarParserParses("start: [a]", classGrammar(false, ["a"], "[a]"));
grammarParserParses("start: [a-de-hi-l]", classGrammar("a-de-hi-l")); grammarParserParses(
"start: [a-de-hi-l]",
classGrammar(false, [["a", "d"], ["e", "h"], ["i", "l"]], "[a-de-hi-l]")
);
grammarParserParses("start: [a-d]\n", classGrammar("a-d")); grammarParserParses("start: [a-d]\n", classGrammar(false, [["a", "d"]], "[a-d"]));
}); });
/* Canonical classCharacterRange is "a-d". */ /* Canonical classCharacterRange is "a-d". */
test("parses classCharacterRange", function() { test("parses classCharacterRange", function() {
grammarParserParses("start: [a-d]", classGrammar("a-d")); grammarParserParses("start: [a-d]", classGrammar(false, [["a", "d"]], "[a-d]"));
grammarParserParses("start: [a-a]", classGrammar("a-a")); grammarParserParses("start: [a-a]", classGrammar(false, [["a", "a"]], "[a-d]"));
grammarParserDoesNotParse("start: [b-a]"); grammarParserDoesNotParse("start: [b-a]");
}); });
/* Canonical classCharacter is "a". */ /* Canonical classCharacter is "a". */
test("parses classCharacter", function() { test("parses classCharacter", function() {
grammarParserParses("start: [a]", classGrammar("a")); grammarParserParses("start: [a]", classGrammar(false, ["a"], "[a]"));
}); });
/* Canonical bracketDelimitedCharacter is "a". */ /* Canonical bracketDelimitedCharacter is "a". */
test("parses bracketDelimitedCharacter", function() { test("parses bracketDelimitedCharacter", function() {
grammarParserParses("start: [a]", classGrammar("a")); grammarParserParses("start: [a]", classGrammar(false, ["a"]));
grammarParserParses("start: [\\n]", classGrammar("\\n")); grammarParserParses("start: [\\n]", classGrammar(false, ["\n"]));
grammarParserParses("start: [\\0]", classGrammar("\\0")); grammarParserParses("start: [\\0]", classGrammar(false, ["\0"]));
grammarParserParses("start: [\\x00]", classGrammar("\\0")); grammarParserParses("start: [\\x00]", classGrammar(false, ["\0"]));
grammarParserParses("start: [\\u0120]", classGrammar("\u0120")); grammarParserParses("start: [\\u0120]", classGrammar(false, ["\u0120"]));
grammarParserParses("start: [\\\n]", classGrammar("\\n")); grammarParserParses("start: [\\\n]", classGrammar(false, ["\n"]));
}); });
/* Canonical simpleBracketDelimiedCharacter is "a". */ /* Canonical simpleBracketDelimiedCharacter is "a". */
test("parses simpleBracketDelimitedCharacter", function() { test("parses simpleBracketDelimitedCharacter", function() {
grammarParserParses("start: [a]", classGrammar("a")); grammarParserParses("start: [a]", classGrammar(false, ["a"]));
grammarParserParses("start: [[]", classGrammar("[")); grammarParserParses("start: [[]", classGrammar(false, ["["]));
grammarParserDoesNotParse("start: []]"); grammarParserDoesNotParse("start: []]");
grammarParserDoesNotParse("start: [\\]"); grammarParserDoesNotParse("start: [\\]");
grammarParserDoesNotParse("start: [\n]"); grammarParserDoesNotParse("start: [\n]");

Loading…
Cancel
Save