Browse Source

Bootstrapped the grammar parser, yay! I should have done this long ago.

redux
David Majda 12 years ago
parent
commit
a43d1b33e3
  1. 10
      Rakefile
  2. 1
      bin/pegjs-main.js
  3. 860
      lib/compiler.js
  4. 4625
      lib/metagrammar.js
  5. 217
      lib/metagrammar.pegjs
  6. 453
      test/compiler-test.js
  7. 2
      test/index.html
  8. 460
      test/metagrammar-test.js

10
Rakefile

@ -21,4 +21,12 @@ file "lib/pegjs-runtime-#{version}.min.js" => "lib/runtime.js" do |t|
File.open(t.name, "w") { |f| f.write(response.body) }
end
task :default => "lib/pegjs-runtime-#{version}.min.js"
file "lib/metagrammar.js" => "lib/metagrammar.pegjs" do |t|
system "bin/pegjs --start grammar PEG.grammarParser lib/metagrammar.pegjs"
end
desc "Build the minified parser runtime"
task :minify => "lib/pegjs-runtime-#{version}.min.js"
desc "Generate the grammar parser"
task :metaparser => "lib/metagrammar.js"

1
bin/pegjs-main.js

@ -7,6 +7,7 @@ importPackage(java.lang);
*/
load(arguments[0] + "/../lib/runtime.js");
load(arguments[0] + "/../lib/compiler.js");
load(arguments[0] + "/../lib/metagrammar.js");
var FILE_STDIN = "-";
var FILE_STDOUT = "-";

860
lib/compiler.js

@ -518,864 +518,4 @@ PEG.Grammar.Action.prototype.compile = function(resultVar) {
);
};
/* ===== PEG.grammarParser ===== */
var returnFirstArg = function() { return arguments[0]; }
var returnSecondArg = function() { return arguments[1]; }
var returnSecondArgJoined = function() { return arguments[1].join(""); }
var returnFirstArgAndSecondArgJoined = function() { return arguments[0] + arguments[1].join(""); }
function characterRule(name, ch) {
with (PEG.Grammar) {
return new Rule(
name,
null,
new Action(
new Sequence([new Literal(ch), new RuleRef("__")]),
returnFirstArg
)
);
}
}
/* Bootstrapping is really badly needed. */
with (PEG.Grammar) {
PEG.grammarParser = PEG.buildParser({
grammar:
new Rule(
"grammar",
null,
new Action(
new Sequence([
new RuleRef("__"),
new RuleRef("rule"),
new ZeroOrMore(new RuleRef("rule"))
]),
function(dummy, first, rest) {
var rules = [first].concat(rest);
var result = {};
for (var i = 0; i < rules.length; i++) {
result[rules[i].getName()] = rules[i];
}
return result;
}
)
),
rule:
new Rule(
"rule",
null,
new Action(
new Sequence([
new RuleRef("identifier"),
new Choice([new RuleRef("literal"), new Literal("")]),
new RuleRef("colon"),
new RuleRef("expression")
]),
function(name, humanName, dummy, expression) {
return new PEG.Grammar.Rule(
name,
humanName !== "" ? humanName : null,
expression
);
}
)
),
expression:
new Rule("expression", null, new RuleRef("choice")),
choice:
new Rule(
"choice",
null,
new Action(
new Sequence([
new RuleRef("sequence"),
new ZeroOrMore(
new Sequence([new RuleRef("slash"), new RuleRef("sequence")])
)
]),
function(first, rest) {
return rest.length > 0
? new PEG.Grammar.Choice([first].concat(PEG.ArrayUtils.map(
rest,
function(element) { return element[1]; }
)))
: first;
}
)
),
sequence:
new Rule(
"sequence",
null,
new Choice([
new Action(
new Sequence([
new ZeroOrMore(new RuleRef("prefixed")),
new RuleRef("action")
]),
function(expressions, action) {
return new PEG.Grammar.Action(
expressions.length != 1
? new PEG.Grammar.Sequence(expressions)
: expressions[0],
action
);
}
),
new Action(
new ZeroOrMore(new RuleRef("prefixed")),
function(expressions) {
return expressions.length != 1
? new PEG.Grammar.Sequence(expressions)
: expressions[0];
}
)
])
),
prefixed:
new Rule(
"prefixed",
null,
new Choice([
new Action(
new Sequence([new RuleRef("and"), new RuleRef("suffixed")]),
function(dummy, expression) {
return new PEG.Grammar.NotPredicate(
new PEG.Grammar.NotPredicate(expression)
);
}
),
new Action(
new Sequence([new RuleRef("not"), new RuleRef("suffixed")]),
function(dummy, expression) {
return new PEG.Grammar.NotPredicate(expression);
}
),
new RuleRef("suffixed")
])
),
suffixed:
new Rule(
"suffixed",
null,
new Choice([
new Action(
new Sequence([new RuleRef("primary"), new RuleRef("question")]),
function(expression) {
return new PEG.Grammar.Choice([
expression,
new PEG.Grammar.Literal("")
]);
}
),
new Action(
new Sequence([new RuleRef("primary"), new RuleRef("star")]),
function(expression) { return new PEG.Grammar.ZeroOrMore(expression); }
),
new Action(
new Sequence([new RuleRef("primary"), new RuleRef("plus")]),
function(expression) {
return new PEG.Grammar.Action(
new PEG.Grammar.Sequence([
expression,
new PEG.Grammar.ZeroOrMore(expression)
]),
function(first, rest) { return [first].concat(rest); }
);
}
),
new RuleRef("primary")
])
),
primary:
new Rule(
"primary",
null,
new Choice([
new Action(
new Sequence([
new RuleRef("identifier"),
new NotPredicate(
new Sequence([
new Choice([new RuleRef("literal"), new Literal("")]),
new RuleRef("colon")
])
)
]),
function(identifier) { return new PEG.Grammar.RuleRef(identifier); }
),
new Action(
new RuleRef("literal"),
function(literal) { return new PEG.Grammar.Literal(literal); }
),
new Action(
new RuleRef("dot"),
function() { return new PEG.Grammar.Any(); }
),
new Action(
new RuleRef("class"),
function(characters) {
return new PEG.Grammar.Choice(
PEG.ArrayUtils.map(
characters.split(""),
function(character) {
return new PEG.Grammar.Literal(character);
}
)
);
}
),
new Action(
new Sequence([
new RuleRef("lparen"),
new RuleRef("expression"),
new RuleRef("rparen")
]),
returnSecondArg
)
])
),
/* "Lexical" elements */
action:
new Rule(
"action",
"action",
new Action(
new Sequence([new RuleRef("braced"), new RuleRef("__")]),
function(braced) { return braced.substr(1, braced.length - 2); }
)
),
braced:
new Rule(
"braced",
null,
new Action(
new Sequence([
new Literal("{"),
new ZeroOrMore(
new Choice([
new RuleRef("braced"),
new RuleRef("nonBraceCharacters")
])
),
new Literal("}")
]),
function(leftBrace, parts, rightBrace) {
return leftBrace + parts.join("") + rightBrace;
}
)
),
nonBraceCharacters:
new Rule(
"nonBraceCharacters",
null,
new Action(
new Sequence([
new RuleRef("nonBraceCharacter"),
new ZeroOrMore(new RuleRef("nonBraceCharacter"))
]),
returnFirstArgAndSecondArgJoined
)
),
nonBraceCharacter:
new Rule(
"nonBraceCharacter",
null,
new Action(
new Sequence([
new NotPredicate(new Choice([new Literal("{"), new Literal("}")])),
new Any()
]),
returnSecondArg
)
),
colon: characterRule("colon", ":"),
slash: characterRule("slash", "/"),
and: characterRule("and", "&"),
not: characterRule("not", "!"),
question: characterRule("question", "?"),
star: characterRule("star", "*"),
plus: characterRule("plus", "+"),
lparen: characterRule("lparen", "("),
rparen: characterRule("rparen", ")"),
dot: characterRule("dot", "."),
/*
* Modelled after ECMA-262, 5th ed., 7.6, but much simplified:
*
* * no Unicode escape sequences
*
* * "Unicode combining marks" and "Unicode connection punctuation" can't
* be part of the identifier
*
* * only [a-zA-Z] is considered a "Unicode letter"
*
* * only [0-9] is considered a "Unicode digit"
*
* The simplifications were made just to make the implementation little
* bit easier, there is no "philosophical" reason behind them.
*/
identifier:
new Rule(
"identifier",
"identifier",
new Action(
new Sequence([
new Choice([
new RuleRef("letter"),
new Literal("_"),
new Literal("$")
]),
new ZeroOrMore(
new Choice([
new RuleRef("letter"),
new RuleRef("digit"),
new Literal("_"),
new Literal("$")
])
),
new RuleRef("__")
]),
returnFirstArgAndSecondArgJoined
)
),
/*
* Modelled after ECMA-262, 5th ed., 7.8.4. (syntax & semantics, rules only
* vaguely),
*/
literal:
new Rule(
"literal",
"literal",
new Action(
new Sequence([
new Choice([
new RuleRef("doubleQuotedLiteral"),
new RuleRef("singleQuotedLiteral")
]),
new RuleRef("__")
]),
returnFirstArg
)
),
doubleQuotedLiteral:
new Rule(
"doubleQuotedLiteral",
null,
new Action(
new Sequence([
new Literal('"'),
new ZeroOrMore(new RuleRef("doubleQuotedCharacter")),
new Literal('"')
]),
returnSecondArgJoined
)
),
doubleQuotedCharacter:
new Rule(
"doubleQuotedCharacter",
null,
new Choice([
new RuleRef("simpleDoubleQuotedCharacter"),
new RuleRef("simpleEscapeSequence"),
new RuleRef("zeroEscapeSequence"),
new RuleRef("hexEscapeSequence"),
new RuleRef("unicodeEscapeSequence"),
new RuleRef("eolEscapeSequence")
])
),
simpleDoubleQuotedCharacter:
new Rule(
"simpleDoubleQuotedCharacter",
null,
new Action(
new Sequence([
new NotPredicate(
new Choice([
new Literal('"'),
new Literal("\\"),
new RuleRef("eolChar")
])
),
new Any()
]),
returnSecondArg
)
),
singleQuotedLiteral:
new Rule(
"singleQuotedLiteral",
null,
new Action(
new Sequence([
new Literal("'"),
new ZeroOrMore(new RuleRef("singleQuotedCharacter")),
new Literal("'")
]),
returnSecondArgJoined
)
),
singleQuotedCharacter:
new Rule(
"singleQuotedCharacter",
null,
new Choice([
new RuleRef("simpleSingleQuotedCharacter"),
new RuleRef("simpleEscapeSequence"),
new RuleRef("zeroEscapeSequence"),
new RuleRef("hexEscapeSequence"),
new RuleRef("unicodeEscapeSequence"),
new RuleRef("eolEscapeSequence")
])
),
simpleSingleQuotedCharacter:
new Rule(
"simpleSingleQuotedCharacter",
null,
new Action(
new Sequence([
new NotPredicate(
new Choice([
new Literal("'"),
new Literal("\\"),
new RuleRef("eolChar")
])
),
new Any()
]),
returnSecondArg
)
),
"class":
new Rule(
"class",
"character class",
new Action(
new Sequence([
new Literal("["),
new ZeroOrMore(
new Choice([
new RuleRef("classCharacterRange"),
new RuleRef("classCharacter")
])
),
new Literal("]"),
new RuleRef("__")
]),
returnSecondArgJoined
)
),
classCharacterRange:
new Rule(
"classCharacterRange",
null,
new Action(
new Sequence([
new RuleRef("bracketDelimitedCharacter"),
new Literal("-"),
new RuleRef("bracketDelimitedCharacter")
]),
function(begin, dummy2, end) {
var beginCharCode = begin.charCodeAt(0);
var endCharCode = end.charCodeAt(0);
if (beginCharCode > endCharCode) {
throw new PEG.Parser.SyntaxError(
"Invalid character range: " + begin + "-" + end + "."
);
}
var result = "";
for (var charCode = beginCharCode; charCode <= endCharCode; charCode++) {
result += String.fromCharCode(charCode);
}
return result;
}
)
),
classCharacter:
new Rule("classCharacter", null, new RuleRef("bracketDelimitedCharacter")),
bracketDelimitedCharacter:
new Rule(
"bracketDelimitedCharacter",
null,
new Choice([
new RuleRef("simpleBracketDelimitedCharacter"),
new RuleRef("simpleEscapeSequence"),
new RuleRef("zeroEscapeSequence"),
new RuleRef("hexEscapeSequence"),
new RuleRef("unicodeEscapeSequence"),
new RuleRef("eolEscapeSequence")
])
),
simpleBracketDelimitedCharacter:
new Rule(
"simpleBracketDelimitedCharacter",
null,
new Action(
new Sequence([
new NotPredicate(
new Choice([
new Literal(']'),
new Literal("\\"),
new RuleRef("eolChar")
])
),
new Any()
]),
returnSecondArg
)
),
simpleEscapeSequence:
new Rule(
"simpleEscapeSequence",
null,
new Action(
new Sequence([
new Literal("\\"),
new NotPredicate(
new Choice([
new RuleRef("digit"),
new Literal("x"),
new Literal("u"),
new RuleRef("eolChar")
])
),
new Any()
]),
function(dummy1, dummy2, character) {
return character
.replace("b", "\b")
.replace("f", "\f")
.replace("n", "\n")
.replace("r", "\r")
.replace("t", "\t")
.replace("v", "\v")
}
)
),
zeroEscapeSequence:
new Rule(
"zeroEscapeSequence",
null,
new Action(
new Sequence([
new Literal("\\0"),
new NotPredicate(new RuleRef("digit"))
]),
function() { return "\0" }
)
),
hexEscapeSequence:
new Rule(
"hexEscapeSequence",
null,
new Action(
new Sequence([
new Literal("\\x"),
new RuleRef("hexDigit"),
new RuleRef("hexDigit")
]),
function(dummy, digit1, digit2) {
return String.fromCharCode(parseInt("0x" + digit1 + digit2));
}
)
),
unicodeEscapeSequence:
new Rule(
"unicodeEscapeSequence",
null,
new Action(
new Sequence([
new Literal("\\u"),
new RuleRef("hexDigit"),
new RuleRef("hexDigit"),
new RuleRef("hexDigit"),
new RuleRef("hexDigit")
]),
function(dummy, digit1, digit2, digit3, digit4) {
return String.fromCharCode(parseInt(
"0x" + digit1 + digit2 + digit3 + digit4
));
}
)
),
eolEscapeSequence:
new Rule(
"eolEscapeSequence",
null,
new Action(
new Sequence([new Literal("\\"), new RuleRef("eol")]),
returnSecondArg
)
),
digit:
new Rule(
"digit",
null,
new Choice([
new Literal("0"),
new Literal("1"),
new Literal("2"),
new Literal("3"),
new Literal("4"),
new Literal("5"),
new Literal("6"),
new Literal("7"),
new Literal("8"),
new Literal("9")
])
),
hexDigit:
new Rule(
"hexDigit",
null,
new Choice([
new Literal("0"),
new Literal("1"),
new Literal("2"),
new Literal("3"),
new Literal("4"),
new Literal("5"),
new Literal("6"),
new Literal("7"),
new Literal("8"),
new Literal("9"),
new Literal("a"),
new Literal("b"),
new Literal("c"),
new Literal("d"),
new Literal("e"),
new Literal("f"),
new Literal("A"),
new Literal("B"),
new Literal("C"),
new Literal("D"),
new Literal("E"),
new Literal("F")
])
),
letter:
new Rule(
"letter",
null,
new Choice([
new RuleRef("lowerCaseLetter"),
new RuleRef("upperCaseLetter")
])
),
lowerCaseLetter:
new Rule(
"lowerCaseLetter",
null,
new Choice([
new Literal("a"),
new Literal("b"),
new Literal("c"),
new Literal("d"),
new Literal("e"),
new Literal("f"),
new Literal("g"),
new Literal("h"),
new Literal("i"),
new Literal("j"),
new Literal("k"),
new Literal("l"),
new Literal("m"),
new Literal("n"),
new Literal("o"),
new Literal("p"),
new Literal("q"),
new Literal("r"),
new Literal("s"),
new Literal("t"),
new Literal("u"),
new Literal("v"),
new Literal("w"),
new Literal("x"),
new Literal("y"),
new Literal("z")
])
),
upperCaseLetter:
new Rule(
"upperCaseLetter",
null,
new Choice([
new Literal("A"),
new Literal("B"),
new Literal("C"),
new Literal("D"),
new Literal("E"),
new Literal("F"),
new Literal("G"),
new Literal("H"),
new Literal("I"),
new Literal("J"),
new Literal("K"),
new Literal("L"),
new Literal("M"),
new Literal("N"),
new Literal("O"),
new Literal("P"),
new Literal("Q"),
new Literal("R"),
new Literal("S"),
new Literal("T"),
new Literal("U"),
new Literal("V"),
new Literal("W"),
new Literal("X"),
new Literal("Y"),
new Literal("Z")
])
),
__:
new Rule(
"__",
null,
new ZeroOrMore(
new Choice([
new RuleRef("whitespace"),
new RuleRef("eol"),
new RuleRef("comment")
])
)
),
/* Modelled after ECMA-262, 5th ed., 7.4. */
comment:
new Rule(
"comment",
"comment",
new Choice([
new RuleRef("singleLineComment"),
new RuleRef("multiLineComment")
])
),
singleLineComment:
new Rule(
"singleLineComment",
null,
new Sequence([
new Literal("//"),
new ZeroOrMore(
new Sequence([new NotPredicate(new RuleRef("eolChar")), new Any()])
)
])
),
multiLineComment:
new Rule(
"multiLineComment",
null,
new Sequence([
new Literal("/*"),
new ZeroOrMore(
new Sequence([new NotPredicate(new Literal("*/")), new Any()])
),
new Literal("*/")
])
),
/* Modelled after ECMA-262, 5th ed., 7.3. */
eol:
new Rule(
"eol",
"end of line",
new Choice([
new Literal("\n"),
new Literal("\r\n"),
new Literal("\r"),
new Literal("\u2028"),
new Literal("\u2029")
])
),
eolChar:
new Rule(
"eolChar",
null,
new Choice([
new Literal("\n"),
new Literal("\r"),
new Literal("\u2028"),
new Literal("\u2029")
])
),
/* Modelled after ECMA-262, 5th ed., 7.2. */
whitespace:
new Rule(
"whitespace",
"whitespace",
new Choice([
new Literal(" "),
new Literal("\t"),
new Literal("\v"),
new Literal("\f"),
new Literal("\xA0"),
// Should be here, but causes infinite loop in Rhino:
// new Literal("\uFEFF"),
new Literal("\u1680"),
new Literal("\u180E"),
new Literal("\u2000"),
new Literal("\u2001"),
new Literal("\u2002"),
new Literal("\u2003"),
new Literal("\u2004"),
new Literal("\u2005"),
new Literal("\u2006"),
new Literal("\u2007"),
new Literal("\u2008"),
new Literal("\u2009"),
new Literal("\u200A"),
new Literal("\u202F"),
new Literal("\u205F"),
new Literal("\u3000")
])
),
}, "grammar");
}
})();

4625
lib/metagrammar.js
File diff suppressed because it is too large
View File

217
lib/metagrammar.pegjs

@ -0,0 +1,217 @@
grammar: __ rule+ {
var result = {};
for (var i = 0; i < $2.length; i++) { result[$2[i].getName()] = $2[i]; }
return result;
}
rule: identifier (literal / "") colon expression {
return new PEG.Grammar.Rule($1, $2 !== "" ? $2 : null, $4);
}
expression: choice
choice: sequence (slash sequence)* {
return $2.length > 0
? new PEG.Grammar.Choice([$1].concat(PEG.ArrayUtils.map(
$2,
function(element) { return element[1]; }
)))
: $1;
}
sequence
: prefixed* action {
return new PEG.Grammar.Action(
$1.length != 1 ? new PEG.Grammar.Sequence($1) : $1[0],
$2
);
}
/ prefixed* { return $1.length != 1 ? new PEG.Grammar.Sequence($1) : $1[0]; }
prefixed
: and suffixed {
return new PEG.Grammar.NotPredicate(new PEG.Grammar.NotPredicate($2));
}
/ not suffixed { return new PEG.Grammar.NotPredicate($2); }
/ suffixed
suffixed
: primary question {
return new PEG.Grammar.Choice([$1, new PEG.Grammar.Literal("")]);
}
/ primary star { return new PEG.Grammar.ZeroOrMore($1); }
/ primary plus {
return new PEG.Grammar.Action(
new PEG.Grammar.Sequence([$1, new PEG.Grammar.ZeroOrMore($1)]),
function(first, rest) { return [first].concat(rest); }
);
}
/ primary
primary
: identifier !(( literal / "") colon) { return new PEG.Grammar.RuleRef($1); }
/ literal { return new PEG.Grammar.Literal($1); }
/ dot { return new PEG.Grammar.Any(); }
/ class {
return new PEG.Grammar.Choice(
PEG.ArrayUtils.map(
$1.split(""),
function(character) { return new PEG.Grammar.Literal(character); }
)
);
}
/ lparen expression rparen { return $2; }
/* "Lexical" elements */
action "action": braced __ { return $1.substr(1, $1.length - 2); }
braced: "{" (braced / nonBraceCharacter)* "}" { return $1 + $2.join("") + $3; }
nonBraceCharacters: nonBraceCharacter+ { return $1.join(""); }
nonBraceCharacter: !("{" / "}") . { return $2; }
colon: ":" __ { return $1; }
slash: "/" __ { return $1; }
and: "&" __ { return $1; }
not: "!" __ { return $1; }
question: "?" __ { return $1; }
star: "*" __ { return $1; }
plus: "+" __ { return $1; }
lparen: "(" __ { return $1; }
rparen: ")" __ { return $1; }
dot: "." __ { return $1; }
/*
* Modelled after ECMA-262, 5th ed., 7.6, but much simplified:
*
* * no Unicode escape sequences
*
* * "Unicode combining marks" and "Unicode connection punctuation" can't be
* part of the identifier
*
* * only [a-zA-Z] is considered a "Unicode letter"
*
* * only [0-9] is considered a "Unicode digit"
*
* The simplifications were made just to make the implementation little bit
* easier, there is no "philosophical" reason behind them.
*/
identifier "identifier": (letter / "_" / "$") (letter / digit / "_" / "$")* __ {
return $1 + $2.join("");
}
/*
* Modelled after ECMA-262, 5th ed., 7.8.4. (syntax & semantics, rules only
* vaguely).
*/
literal "literal": (doubleQuotedLiteral / singleQuotedLiteral) __ { return $1; }
doubleQuotedLiteral: '"' doubleQuotedCharacter* '"' { return $2.join(""); }
doubleQuotedCharacter
: simpleDoubleQuotedCharacter
/ simpleEscapeSequence
/ zeroEscapeSequence
/ hexEscapeSequence
/ unicodeEscapeSequence
/ eolEscapeSequence
simpleDoubleQuotedCharacter: !('"' / "\\" / eolChar) . { return $2; }
singleQuotedLiteral: "'" singleQuotedCharacter* "'" { return $2.join(""); }
singleQuotedCharacter
: simpleSingleQuotedCharacter
/ simpleEscapeSequence
/ zeroEscapeSequence
/ hexEscapeSequence
/ unicodeEscapeSequence
/ eolEscapeSequence
simpleSingleQuotedCharacter: !("'" / "\\" / eolChar) . { return $2; }
class "character class": "[" (classCharacterRange / classCharacter)* "]" __ {
return $2.join("");
}
classCharacterRange: bracketDelimitedCharacter "-" bracketDelimitedCharacter {
var beginCharCode = $1.charCodeAt(0);
var endCharCode = $3.charCodeAt(0);
if (beginCharCode > endCharCode) {
throw new PEG.Parser.SyntaxError(
"Invalid character range: " + $1 + "-" + $3 + "."
);
}
var result = "";
for (var charCode = beginCharCode; charCode <= endCharCode; charCode++) {
result += String.fromCharCode(charCode);
}
return result;
}
classCharacter: bracketDelimitedCharacter
bracketDelimitedCharacter
: simpleBracketDelimitedCharacter
/ simpleEscapeSequence
/ zeroEscapeSequence
/ hexEscapeSequence
/ unicodeEscapeSequence
/ eolEscapeSequence
simpleBracketDelimitedCharacter: !("]" / "\\" / eolChar) . { return $2; }
simpleEscapeSequence: "\\" !(digit / "x" / "u" / eolChar) . {
return $3
.replace("b", "\b")
.replace("f", "\f")
.replace("n", "\n")
.replace("r", "\r")
.replace("t", "\t")
.replace("v", "\v")
}
zeroEscapeSequence: "\\0" !digit { return "\0"; }
hexEscapeSequence: "\\x" hexDigit hexDigit {
return String.fromCharCode(parseInt("0x" + $2 + $3));
}
unicodeEscapeSequence: "\\u" hexDigit hexDigit hexDigit hexDigit {
return String.fromCharCode(parseInt("0x" + $2 + $3 + $4 + $5));
}
eolEscapeSequence: "\\" eol { return $2; }
digit: [0-9]
hexDigit: [0-9a-fA-F]
letter: lowerCaseLetter / upperCaseLetter
lowerCaseLetter: [a-z]
upperCaseLetter: [A-Z]
__: (whitespace / eol / comment)*
/* Modelled after ECMA-262, 5th ed., 7.4. */
comment "comment": singleLineComment / multiLineComment
singleLineComment: "//" (!eolChar .)*
multiLineComment: "/*" (!"*/" .)* "*/"
/* Modelled after ECMA-262, 5th ed., 7.3. */
eol "end of line": "\n" / "\r\n" / "\r" / "\u2028" / "\u2029"
eolChar: [\n\r\u2028\u2029]
/*
* Modelled after ECMA-262, 5th ed., 7.2. \uFEFF should be between the
* characters too, but it causes infinite loop in Rhino.
*/
whitespace "whitespace": [ \t\v\f\xA0\u1680\u180E\u2000-\u200A\u202F\u205F\u3000]

453
test/compiler-test.js

@ -57,14 +57,6 @@ global.doesNotParseWithPos = function(parser, input, line, column) {
}
};
global.grammarParserParses = function(input, expected) {
global.parses(PEG.grammarParser, input, expected);
};
global.grammarParserDoesNotParse = function(input) {
global.doesNotParse(PEG.grammarParser, input);
}
/* ===== PEG.Compiler ===== */
module("PEG.Compiler");
@ -526,449 +518,4 @@ test("nested comments", function() {
);
});
/* ===== Grammar Parser ===== */
module("Grammar Parser");
with (PEG.Grammar) {
var literalEmpty = new Literal("");
var literalAbcd = new Literal("abcd");
var literalEfgh = new Literal("efgh");
var literalIjkl = new Literal("ijkl");
var choice = new Choice([literalAbcd, literalEmpty]);
var notAbcd = new NotPredicate(literalAbcd);
var notEfgh = new NotPredicate(literalEfgh);
var notIjkl = new NotPredicate(literalIjkl);
var sequenceEmpty = new Sequence([]);
var sequenceNots = new Sequence([notAbcd, notEfgh, notIjkl]);
var sequenceLiterals = new Sequence([literalAbcd, literalEfgh, literalIjkl]);
function oneRuleGrammar(expression) {
return { start: new PEG.Grammar.Rule("start", null, expression) };
}
var simpleGrammar = oneRuleGrammar(new Literal("abcd"));
function identifierGrammar(identifier) {
return oneRuleGrammar(new PEG.Grammar.RuleRef(identifier));
}
function literalGrammar(literal) {
return oneRuleGrammar(new PEG.Grammar.Literal(literal));
}
function classGrammar(chars) {
return oneRuleGrammar(new PEG.Grammar.Choice(
PEG.ArrayUtils.map(
chars.split(""),
function(char) { return new PEG.Grammar.Literal(char); }
)
));
}
var anyGrammar = oneRuleGrammar(new Any());
function actionGrammar(action) {
return oneRuleGrammar(new PEG.Grammar.Action(new PEG.Grammar.Literal("a"), action));
}
/* Canonical grammar is "a: \"abcd\";\nb: \"efgh\";\nc: \"ijkl\";". */
test("parses grammar", function() {
grammarParserParses('a: "abcd"', { a: new Rule("a", null, literalAbcd) });
grammarParserParses(
'a: "abcd"\nb: "efgh"\nc: "ijkl"',
{
a: new Rule("a", null, literalAbcd),
b: new Rule("b", null, literalEfgh),
c: new Rule("c", null, literalIjkl)
}
);
});
/* Canonical rule is "a: \"abcd\"". */
test("parses rule", function() {
grammarParserParses(
'start: "abcd" / "efgh" / "ijkl"',
oneRuleGrammar(new Choice([literalAbcd, literalEfgh, literalIjkl]))
);
grammarParserParses(
'start "start rule": "abcd" / "efgh" / "ijkl"',
{
start:
new Rule(
"start",
"start rule",
new Choice([literalAbcd, literalEfgh, literalIjkl])
)
}
);
});
/* Canonical expression is "\"abcd\" / \"efgh\" / \"ijkl\"". */
test("parses expression", function() {
grammarParserParses(
'start: "abcd" / "efgh" / "ijkl"',
oneRuleGrammar(new Choice([literalAbcd, literalEfgh, literalIjkl]))
);
});
/* Canonical choice is "\"abcd\" / \"efgh\" / \"ijkl\"". */
test("parses choice", function() {
grammarParserParses(
'start: "abcd" "efgh" "ijkl"',
oneRuleGrammar(sequenceLiterals)
);
grammarParserParses(
'start: "abcd" "efgh" "ijkl" / "abcd" "efgh" "ijkl" / "abcd" "efgh" "ijkl"',
oneRuleGrammar(new Choice([
sequenceLiterals,
sequenceLiterals,
sequenceLiterals
]))
);
});
/* Canonical sequence is "\"abcd\" \"efgh\" \"ijkl\"". */
test("parses sequence", function() {
grammarParserParses(
'start: { code }',
oneRuleGrammar(new Action(sequenceEmpty, " code "))
);
grammarParserParses(
'start: !"abcd" { code }',
oneRuleGrammar(new Action(notAbcd, " code "))
);
grammarParserParses(
'start: !"abcd" !"efgh" !"ijkl" { code }',
oneRuleGrammar(new Action(sequenceNots, " code "))
);
grammarParserParses('start: ', oneRuleGrammar(sequenceEmpty));
grammarParserParses('start: !"abcd"', oneRuleGrammar(notAbcd));
grammarParserParses(
'start: !"abcd" !"efgh" !"ijkl"',
oneRuleGrammar(sequenceNots)
);
});
/* Canonical prefixed is "!\"abcd\"". */
test("parses prefixed", function() {
grammarParserParses(
'start: &"abcd"?',
oneRuleGrammar(new NotPredicate(new NotPredicate(choice)))
);
grammarParserParses('start: !"abcd"?', oneRuleGrammar(new NotPredicate(choice)));
grammarParserParses('start: "abcd"?', oneRuleGrammar(choice));
});
/* Canonical suffixed is "\"abcd\"?". */
test("parses suffixed", function() {
grammarParserParses('start: "abcd"?', oneRuleGrammar(choice));
grammarParserParses('start: "abcd"*', oneRuleGrammar(new ZeroOrMore(literalAbcd)));
grammarParserParses(
'start: "abcd"+',
oneRuleGrammar(new Action(
new Sequence([literalAbcd, new ZeroOrMore(literalAbcd)]),
function(first, rest) { return [first].concat(rest); }
))
);
grammarParserParses('start: "abcd"', literalGrammar("abcd"));
});
/* Canonical primary is "\"abcd\"". */
test("parses primary", function() {
grammarParserParses('start: a', identifierGrammar("a"));
grammarParserParses('start: "abcd"', literalGrammar("abcd"));
grammarParserParses('start: .', anyGrammar);
grammarParserParses('start: [a-d]', classGrammar("abcd"));
grammarParserParses('start: ("abcd")', literalGrammar("abcd"));
});
/* Canonical action is "{ code }". */
test("parses action", function() {
grammarParserParses('start: "a" { code }', actionGrammar(" code "));
});
/* Canonical braced is "{ code }". */
test("parses braced", function() {
grammarParserParses('start: "a" {}', actionGrammar(""));
grammarParserParses('start: "a" {a}', actionGrammar("a"));
grammarParserParses('start: "a" {{a}}', actionGrammar("{a}"));
grammarParserParses('start: "a" {aaa}', actionGrammar("aaa"));
});
/* Trivial character rules are not tested. */
/* Canonical identifier is "a". */
test("parses identifier", function() {
grammarParserParses('start: a', identifierGrammar("a"));
grammarParserParses('start: z', identifierGrammar("z"));
grammarParserParses('start: A', identifierGrammar("A"));
grammarParserParses('start: Z', identifierGrammar("Z"));
grammarParserParses('start: _', identifierGrammar("_"));
grammarParserParses('start: $', identifierGrammar("$"));
grammarParserParses('start: aa', identifierGrammar("aa"));
grammarParserParses('start: az', identifierGrammar("az"));
grammarParserParses('start: aA', identifierGrammar("aA"));
grammarParserParses('start: aZ', identifierGrammar("aZ"));
grammarParserParses('start: a0', identifierGrammar("a0"));
grammarParserParses('start: a9', identifierGrammar("a9"));
grammarParserParses('start: a_', identifierGrammar("a_"));
grammarParserParses('start: a$', identifierGrammar("a$"));
grammarParserParses('start: abcd', identifierGrammar("abcd"));
grammarParserParses('start: a\n', identifierGrammar("a"));
});
/* Canonical literal is "\"abcd\"". */
test("parses literal", function() {
grammarParserParses('start: "abcd"', literalGrammar("abcd"));
grammarParserParses("start: 'abcd'", literalGrammar("abcd"));
});
/* Canonical doubleQuotedLiteral is "\"abcd\"". */
test("parses doubleQuotedLiteral", function() {
grammarParserParses('start: ""', literalGrammar(""));
grammarParserParses('start: "a"', literalGrammar("a"));
grammarParserParses('start: "abc"', literalGrammar("abc"));
grammarParserParses('start: "abcd"\n', literalGrammar("abcd"));
});
/* Canonical doubleQuotedCharacter is "a". */
test("parses doubleQuotedCharacter", function() {
grammarParserParses('start: "a"', literalGrammar("a"));
grammarParserParses('start: "\\n"', literalGrammar("\n"));
grammarParserParses('start: "\\0"', literalGrammar("\0"));
grammarParserParses('start: "\\x00"', literalGrammar("\x00"));
grammarParserParses('start: "\\u0120"', literalGrammar("\u0120"));
grammarParserParses('start: "\\\n"', literalGrammar("\n"));
});
/* Canonical simpleDoubleQuotedCharacter is "a". */
test("parses simpleDoubleQuotedCharacter", function() {
grammarParserParses('start: "a"', literalGrammar("a"));
grammarParserParses('start: "\'"', literalGrammar("'"));
grammarParserDoesNotParse('start: """');
grammarParserDoesNotParse('start: "\\"');
grammarParserDoesNotParse('start: "\n"');
grammarParserDoesNotParse('start: "\r"');
grammarParserDoesNotParse('start: "\u2028"');
grammarParserDoesNotParse('start: "\u2029"');
});
/* Canonical singleQuotedLiteral is "'abcd'". */
test("parses singleQuotedLiteral", function() {
grammarParserParses("start: ''", literalGrammar(""));
grammarParserParses("start: 'a'", literalGrammar("a"));
grammarParserParses("start: 'abc'", literalGrammar("abc"));
grammarParserParses("start: 'abcd'\n", literalGrammar("abcd"));
});
/* Canonical singleQuotedCharacter is "a". */
test("parses singleQuotedCharacter", function() {
grammarParserParses("start: 'a'", literalGrammar("a"));
grammarParserParses("start: '\\n'", literalGrammar("\n"));
grammarParserParses("start: '\\0'", literalGrammar("\0"));
grammarParserParses("start: '\\x00'", literalGrammar("\x00"));
grammarParserParses("start: '\\u0120'", literalGrammar("\u0120"));
grammarParserParses("start: '\\\n'", literalGrammar("\n"));
});
/* Canonical simpleSingleQuotedCharacter is "a". */
test("parses simpleSingleQuotedCharacter", function() {
grammarParserParses("start: 'a'", literalGrammar("a"));
grammarParserParses("start: '\"'", literalGrammar("\""));
grammarParserDoesNotParse("start: '''");
grammarParserDoesNotParse("start: '\\'");
grammarParserDoesNotParse("start: '\n'");
grammarParserDoesNotParse("start: '\r'");
grammarParserDoesNotParse("start: '\u2028'");
grammarParserDoesNotParse("start: '\u2029'");
});
/* Canonical class is "[a-d]". */
test("parses classCharacterRange", function() {
grammarParserParses("start: []", classGrammar(""));
grammarParserParses("start: [a-d]", classGrammar("abcd"));
grammarParserParses("start: [a]", classGrammar("a"));
grammarParserParses("start: [a-de-hi-l]", classGrammar("abcdefghijkl"));
grammarParserParses("start: [a-d]\n", classGrammar("abcd"));
});
/* Canonical classCharacterRange is "a-d". */
test("parses classCharacterRange", function() {
grammarParserParses("start: [a-d]", classGrammar("abcd"));
grammarParserParses("start: [a-a]", classGrammar("a"));
grammarParserDoesNotParse("start: [b-a]");
});
/* Canonical classCharacter is "a". */
test("parses classCharacter", function() {
grammarParserParses("start: [a]", classGrammar("a"));
});
/* Canonical bracketDelimitedCharacter is "a". */
test("parses bracketDelimitedCharacter", function() {
grammarParserParses("start: [a]", classGrammar("a"));
grammarParserParses("start: [\\n]", classGrammar("\n"));
grammarParserParses("start: [\\0]", classGrammar("\0"));
grammarParserParses("start: [\\x00]", classGrammar("\x00"));
grammarParserParses("start: [\\u0120]", classGrammar("\u0120"));
grammarParserParses("start: [\\\n]", classGrammar("\n"));
});
/* Canonical simpleBracketDelimiedCharacter is "a". */
test("parses simpleBracketDelimitedCharacter", function() {
grammarParserParses("start: [a]", classGrammar("a"));
grammarParserParses("start: [[]", classGrammar("["));
grammarParserDoesNotParse("start: []]");
grammarParserDoesNotParse("start: [\\]");
grammarParserDoesNotParse("start: [\n]");
grammarParserDoesNotParse("start: [\r]");
grammarParserDoesNotParse("start: [\u2028]");
grammarParserDoesNotParse("start: [\u2029]");
});
/* Canonical simpleEscapeSequence is "\\n". */
test("parses simpleEscapeSequence", function() {
grammarParserParses('start: "\\\'"', literalGrammar("'"));
grammarParserParses('start: "\\""', literalGrammar("\""));
grammarParserParses('start: "\\\\"', literalGrammar("\\"));
grammarParserParses('start: "\\b"', literalGrammar("\b"));
grammarParserParses('start: "\\f"', literalGrammar("\f"));
grammarParserParses('start: "\\n"', literalGrammar("\n"));
grammarParserParses('start: "\\r"', literalGrammar("\r"));
grammarParserParses('start: "\\t"', literalGrammar("\t"));
grammarParserParses('start: "\\v"', literalGrammar("\v"));
grammarParserParses('start: "\\a"', literalGrammar("a"));
});
/* Canonical zeroEscapeSequence is "\\0". */
test("parses zeroEscapeSequence", function() {
grammarParserParses('start: "\\0"', literalGrammar("\0"));
grammarParserDoesNotParse('start: "\\00"');
grammarParserDoesNotParse('start: "\\09"');
});
/* Canonical hexEscapeSequence is "\\x00". */
test("parses hexEscapeSequence", function() {
grammarParserParses('start: "\\x00"', literalGrammar("\x00"));
grammarParserParses('start: "\\x09"', literalGrammar("\x09"));
grammarParserParses('start: "\\x0a"', literalGrammar("\x0a"));
grammarParserParses('start: "\\x0f"', literalGrammar("\x0f"));
grammarParserParses('start: "\\x0A"', literalGrammar("\x0A"));
grammarParserParses('start: "\\x0F"', literalGrammar("\x0F"));
grammarParserDoesNotParse('start: "\\x0"');
grammarParserParses('start: "\\x000"', literalGrammar("\x000"));
});
/* Canonical unicodeEscapeSequence is "\\u0120". */
test("parses unicodeEscapeSequence", function() {
grammarParserParses('start: "\\u0120"', literalGrammar("\u0120"));
grammarParserParses('start: "\\u0129"', literalGrammar("\u0129"));
grammarParserParses('start: "\\u012a"', literalGrammar("\u012a"));
grammarParserParses('start: "\\u012f"', literalGrammar("\u012f"));
grammarParserParses('start: "\\u012A"', literalGrammar("\u012A"));
grammarParserParses('start: "\\u012F"', literalGrammar("\u012F"));
grammarParserDoesNotParse('start: "\\u012"');
grammarParserParses('start: "\\u01234"', literalGrammar("\u01234"));
});
/* Canonical eolEscapeSequence is "\\\n". */
test("parses eolEscapeSequence", function() {
grammarParserParses('start: "\\\n"', literalGrammar("\n"));
grammarParserParses('start: "\\\r\n"', literalGrammar("\r\n"));
grammarParserParses('start: "\\\r"', literalGrammar("\r"));
grammarParserParses('start: "\\\u2028"', literalGrammar("\u2028"));
grammarParserParses('start: "\\\u2029"', literalGrammar("\u2029"));
});
/* Canonical __ is "\n". */
test("parses __", function() {
grammarParserParses('start:"abcd"', simpleGrammar);
grammarParserParses('start: "abcd"', simpleGrammar);
grammarParserParses('start:\n"abcd"', simpleGrammar);
grammarParserParses('start:/* comment */"abcd"', simpleGrammar);
grammarParserParses('start: "abcd"', simpleGrammar);
});
/* Trivial character class rules are not tested. */
/* Canonical comment is "\/* comment *\/". */
test("parses comment", function() {
grammarParserParses('start:// comment\n"abcd"', simpleGrammar);
grammarParserParses('start:/* comment */"abcd"', simpleGrammar);
});
/* Canonical singleLineComment is "// comment". */
test("parses singleLineComment", function() {
grammarParserParses('start://\n"abcd"', simpleGrammar);
grammarParserParses('start://a\n"abcd"', simpleGrammar);
grammarParserParses('start://aaa\n"abcd"', simpleGrammar);
grammarParserParses('start: "abcd"//', simpleGrammar);
});
/* Canonical multiLineComment is "\/* comment *\/". */
test("parses multiLineComment", function() {
grammarParserParses('start:/**/"abcd"', simpleGrammar);
grammarParserParses('start:/*a*/"abcd"', simpleGrammar);
grammarParserParses('start:/*aaa*/"abcd"', simpleGrammar);
grammarParserParses('start:/*\n*/"abcd"', simpleGrammar);
grammarParserParses('start:/***/"abcd"', simpleGrammar);
grammarParserParses('start:/*a/*/"abcd"', simpleGrammar);
grammarParserDoesNotParse('start:/*"abcd"');
grammarParserDoesNotParse('start:/*/"abcd"');
grammarParserDoesNotParse('start:/*/**/*/"abcd"');
});
/* Canonical eol is "\n". */
test("parses eol", function() {
grammarParserParses('start:\n"abcd"', simpleGrammar);
grammarParserParses('start:\r\n"abcd"', simpleGrammar);
grammarParserParses('start:\r"abcd"', simpleGrammar);
grammarParserParses('start:\u2028"abcd"', simpleGrammar);
grammarParserParses('start:\u2029"abcd"', simpleGrammar);
});
/* Canonical eolChar is "\n". */
test("parses eolChar", function() {
grammarParserParses('start:\n"abcd"', simpleGrammar);
grammarParserParses('start:\r"abcd"', simpleGrammar);
grammarParserParses('start:\u2028"abcd"', simpleGrammar);
grammarParserParses('start:\u2029"abcd"', simpleGrammar);
});
/* Canonical whitespace is " ". */
test("parses whitespace", function() {
grammarParserParses('start:\t"abcd"', simpleGrammar);
grammarParserParses('start:\v"abcd"', simpleGrammar);
grammarParserParses('start:\f"abcd"', simpleGrammar);
grammarParserParses('start: "abcd"', simpleGrammar);
grammarParserParses('start:\xA0"abcd"', simpleGrammar);
grammarParserParses('start:\u1680"abcd"', simpleGrammar);
grammarParserParses('start:\u180E"abcd"', simpleGrammar);
grammarParserParses('start:\u2000"abcd"', simpleGrammar);
grammarParserParses('start:\u2001"abcd"', simpleGrammar);
grammarParserParses('start:\u2002"abcd"', simpleGrammar);
grammarParserParses('start:\u2003"abcd"', simpleGrammar);
grammarParserParses('start:\u2004"abcd"', simpleGrammar);
grammarParserParses('start:\u2005"abcd"', simpleGrammar);
grammarParserParses('start:\u2006"abcd"', simpleGrammar);
grammarParserParses('start:\u2007"abcd"', simpleGrammar);
grammarParserParses('start:\u2008"abcd"', simpleGrammar);
grammarParserParses('start:\u2009"abcd"', simpleGrammar);
grammarParserParses('start:\u200A"abcd"', simpleGrammar);
grammarParserParses('start:\u202F"abcd"', simpleGrammar);
grammarParserParses('start:\u205F"abcd"', simpleGrammar);
grammarParserParses('start:\u3000"abcd"', simpleGrammar);
});
}
})();

2
test/index.html

@ -7,8 +7,10 @@
<script src="../vendor/qunit/qunit.js"></script>
<script src="../lib/runtime.js"></script>
<script src="../lib/compiler.js"></script>
<script src="../lib/metagrammar.js"></script>
<script src="runtime-test.js"></script>
<script src="compiler-test.js"></script>
<script src="metagrammar-test.js"></script>
</head>
<body>
<h1 id="qunit-header">PEG.js Test Suite</h1>

460
test/metagrammar-test.js

@ -0,0 +1,460 @@
(function() {
var global = this;
/* ===== Helpers ===== */
global.grammarParserParses = function(input, expected) {
global.parses(PEG.grammarParser, input, expected);
};
global.grammarParserDoesNotParse = function(input) {
global.doesNotParse(PEG.grammarParser, input);
}
/* ===== Grammar Parser ===== */
module("Grammar Parser");
with (PEG.Grammar) {
var literalEmpty = new Literal("");
var literalAbcd = new Literal("abcd");
var literalEfgh = new Literal("efgh");
var literalIjkl = new Literal("ijkl");
var choice = new Choice([literalAbcd, literalEmpty]);
var notAbcd = new NotPredicate(literalAbcd);
var notEfgh = new NotPredicate(literalEfgh);
var notIjkl = new NotPredicate(literalIjkl);
var sequenceEmpty = new