Rewrote implementation of classes to be regexp-based.

redux
David Majda 15 years ago
parent 56ffa94cc7
commit 22d2ac8ac2

@ -82,6 +82,8 @@ PEG.Grammar.Rule.prototype = {
PEG.Grammar.Literal = function(value) { this._value = value; };
PEG.Grammar.Class = function(characters) { this._characters = characters; };
PEG.Grammar.Any = function() {};
PEG.Grammar.Sequence = function(elements) { this._elements = elements; };
@ -111,6 +113,8 @@ PEG.Grammar.Rule.prototype.checkReferencedRulesExist = function(grammar) {
PEG.Grammar.Literal.prototype.checkReferencedRulesExist = function(grammar) {};
PEG.Grammar.Class.prototype.checkReferencedRulesExist = function(grammar) {};
PEG.Grammar.Any.prototype.checkReferencedRulesExist = function(grammar) {};
PEG.Grammar.Sequence.prototype.checkReferencedRulesExist = function(grammar) {
@ -151,6 +155,8 @@ PEG.Grammar.Rule.prototype.checkNoLeftRecursion = function(grammar, appliedRules
PEG.Grammar.Literal.prototype.checkNoLeftRecursion = function(grammar, appliedRules) {};
PEG.Grammar.Class.prototype.checkNoLeftRecursion = function(grammar, appliedRules) {};
PEG.Grammar.Any.prototype.checkNoLeftRecursion = function(grammar, appliedRules) {};
PEG.Grammar.Sequence.prototype.checkNoLeftRecursion = function(grammar, appliedRules) {
@ -418,6 +424,25 @@ PEG.Grammar.Literal.prototype.compile = function(resultVar) {
);
};
PEG.Grammar.Class.prototype.compile = function(resultVar) {
return PEG.Compiler.formatCode(
"if (this._input.substr(this._pos).match(${regexp}) !== null) {",
" var ${resultVar} = this._input[this._pos];",
" this._pos++;",
"} else {",
" var ${resultVar} = null;",
" if (context.reportMatchFailures) {",
" this._matchFailed(new PEG.Parser.ClassMatchFailure(${characters|string}));",
" }",
"}",
{
characters: this._characters,
regexp: "/^[" + this._characters + "]/",
resultVar: resultVar
}
);
};
PEG.Grammar.Any.prototype.compile = function(resultVar) {
return PEG.Compiler.formatCode(
"if (this._input.length > this._pos) {",

File diff suppressed because it is too large Load Diff

@ -52,15 +52,8 @@ primary
: identifier !(( literal / "") colon) { return new PEG.Grammar.RuleRef($1); }
/ literal { return new PEG.Grammar.Literal($1); }
/ dot { return new PEG.Grammar.Any(); }
/ class {
return new PEG.Grammar.Choice(
PEG.ArrayUtils.map(
$1.split(""),
function(character) { return new PEG.Grammar.Literal(character); }
)
);
}
/ lparen expression rparen { return $2; }
/ class { return new PEG.Grammar.Class($1); }
/ lparen expression rparen { return $2; }
/* "Lexical" elements */
@ -137,22 +130,24 @@ class "character class": "[" (classCharacterRange / classCharacter)* "]" __ {
}
classCharacterRange: bracketDelimitedCharacter "-" bracketDelimitedCharacter {
var beginCharCode = $1.charCodeAt(0);
var endCharCode = $3.charCodeAt(0);
if (beginCharCode > endCharCode) {
if ($1.charCodeAt(0) > $3.charCodeAt(0)) {
throw new PEG.Parser.SyntaxError(
"Invalid character range: " + $1 + "-" + $3 + "."
"Invalid character range: "
+ PEG.RegExpUtils.quoteForClass($1)
+ "-"
+ PEG.RegExpUtils.quoteForClass($3)
+ "."
);
}
var result = "";
for (var charCode = beginCharCode; charCode <= endCharCode; charCode++) {
result += String.fromCharCode(charCode);
}
return result;
return PEG.RegExpUtils.quoteForClass($1)
+ "-"
+ PEG.RegExpUtils.quoteForClass($3);
}
classCharacter: bracketDelimitedCharacter
classCharacter: bracketDelimitedCharacter {
return PEG.RegExpUtils.quoteForClass($1);
}
bracketDelimitedCharacter
: simpleBracketDelimitedCharacter

@ -58,6 +58,29 @@ PEG.StringUtils = {
};
/* ===== PEG.RegExpUtils ===== */
/* RegExp manipulation utility functions. */
PEG.RegExpUtils = {
/*
* Escapes characters inside the string so that it can be used as a list of
* characters in a character class of a regular expresion.
*/
quoteForClass: function(s) {
/* Based on ECMA-262, 5th ed., 7.8.5 & 15.10.1. */
return s
.replace(/\\/g, '\\\\') // backslash
.replace(/\//g, '\\/') // closing slash
.replace(/]/g, '\\]') // closing bracket
.replace(/-/g, '\\-') // dash
.replace(/\r/g, '\\r') // carriage return
.replace(/\u2028/g, '\\u2028') // line separator
.replace(/\u2029/g, '\\u2029') // paragraph separator
.replace(/\n/g, '\\n') // line feed
}
};
/* ===== PEG.Parser ===== */
/* Prototype of all parsers generated by PEG.js. */
@ -211,6 +234,18 @@ PEG.Parser.LiteralMatchFailure.prototype = {
toString: function() { return PEG.StringUtils.quote(this._value); }
};
/* ===== PEG.Parser.ClassMatchFailure ===== */
/* Stores information about a class match failure. */
PEG.Parser.ClassMatchFailure = function(characters) {
this._characters = characters;
};
PEG.Parser.ClassMatchFailure.prototype = {
toString: function() { return "[" + this._characters + "]"; }
};
/* ===== PEG.Parser.AnyMatchFailure ===== */
/* Stores information about a failure to match a "." expression. */

@ -47,12 +47,7 @@ with (PEG.Grammar) {
}
function classGrammar(chars) {
return oneRuleGrammar(new PEG.Grammar.Choice(
PEG.ArrayUtils.map(
chars.split(""),
function(char) { return new PEG.Grammar.Literal(char); }
)
));
return oneRuleGrammar(new PEG.Grammar.Class(chars));
}
var anyGrammar = oneRuleGrammar(new Any());
@ -169,7 +164,7 @@ with (PEG.Grammar) {
grammarParserParses('start: a', identifierGrammar("a"));
grammarParserParses('start: "abcd"', literalGrammar("abcd"));
grammarParserParses('start: .', anyGrammar);
grammarParserParses('start: [a-d]', classGrammar("abcd"));
grammarParserParses('start: [a-d]', classGrammar("a-d"));
grammarParserParses('start: ("abcd")', literalGrammar("abcd"));
});
@ -280,17 +275,17 @@ with (PEG.Grammar) {
/* Canonical class is "[a-d]". */
test("parses classCharacterRange", function() {
grammarParserParses("start: []", classGrammar(""));
grammarParserParses("start: [a-d]", classGrammar("abcd"));
grammarParserParses("start: [a-d]", classGrammar("a-d"));
grammarParserParses("start: [a]", classGrammar("a"));
grammarParserParses("start: [a-de-hi-l]", classGrammar("abcdefghijkl"));
grammarParserParses("start: [a-de-hi-l]", classGrammar("a-de-hi-l"));
grammarParserParses("start: [a-d]\n", classGrammar("abcd"));
grammarParserParses("start: [a-d]\n", classGrammar("a-d"));
});
/* Canonical classCharacterRange is "a-d". */
test("parses classCharacterRange", function() {
grammarParserParses("start: [a-d]", classGrammar("abcd"));
grammarParserParses("start: [a-a]", classGrammar("a"));
grammarParserParses("start: [a-d]", classGrammar("a-d"));
grammarParserParses("start: [a-a]", classGrammar("a-a"));
grammarParserDoesNotParse("start: [b-a]");
});
@ -302,11 +297,11 @@ with (PEG.Grammar) {
/* Canonical bracketDelimitedCharacter is "a". */
test("parses bracketDelimitedCharacter", function() {
grammarParserParses("start: [a]", classGrammar("a"));
grammarParserParses("start: [\\n]", classGrammar("\n"));
grammarParserParses("start: [\\n]", classGrammar("\\n"));
grammarParserParses("start: [\\0]", classGrammar("\0"));
grammarParserParses("start: [\\x00]", classGrammar("\x00"));
grammarParserParses("start: [\\u0120]", classGrammar("\u0120"));
grammarParserParses("start: [\\\n]", classGrammar("\n"));
grammarParserParses("start: [\\\n]", classGrammar("\\n"));
});
/* Canonical simpleBracketDelimiedCharacter is "a". */

@ -37,4 +37,17 @@ test("quote", function() {
);
});
/* ===== PEG.RegExpUtils ===== */
module("PEG.RegExpUtils");
test("quoteForClass", function() {
strictEqual(PEG.RegExpUtils.quoteForClass(""), '');
strictEqual(PEG.RegExpUtils.quoteForClass("abcd"), 'abcd');
strictEqual(
PEG.RegExpUtils.quoteForClass("\\/]-\r\u2028\u2029\n\\/]-\r\u2028\u2029\n"),
'\\\\\\/\\]\\-\\r\\u2028\\u2029\\n\\\\\\/\\]\\-\\r\\u2028\\u2029\\n'
);
});
})();

Loading…
Cancel
Save