PEG.js grammar: More JavaScript-like rules for strings/literals/classes

redux
David Majda 10 years ago
parent bcb5271649
commit 0d6b91cb20

File diff suppressed because it is too large Load Diff

@ -428,16 +428,16 @@ describe("PEG.js grammar parser", function() {
expect('start = abcd').toParseAs(ruleRefGrammar("abcd")); expect('start = abcd').toParseAs(ruleRefGrammar("abcd"));
}); });
/* Canonical Literal is "\"abcd\"". */ /* Trivial character class rules are not tested. */
it("parses Literal", function() {
expect('start = "abcd"' ).toParseAs(literalGrammar("abcd"));
expect("start = 'abcd'" ).toParseAs(literalGrammar("abcd"));
/* Canonical LiteralMatcher is "\"abcd\"". */
it("parses LiteralMatcher", function() {
expect('start = "abcd"' ).toParseAs(literalGrammar("abcd"));
expect('start = "abcd"i').toParseAs(literalGrammar("abcd", true)); expect('start = "abcd"i').toParseAs(literalGrammar("abcd", true));
}); });
/* Canonical String is "\"abcd\"". */ /* Canonical StringLiteral is "\"abcd\"". */
it("parses String", function() { it("parses StringLiteral", function() {
var grammar = oneRuleGrammar({ var grammar = oneRuleGrammar({
type: "named", type: "named",
name: "abcd", name: "abcd",
@ -448,67 +448,42 @@ describe("PEG.js grammar parser", function() {
expect('start \'abcd\' = "abcd"').toParseAs(grammar); expect('start \'abcd\' = "abcd"').toParseAs(grammar);
}); });
/* Canonical DoubleQuotedString is "\"abcd\"". */ /* Canonical DoubleStringCharacter is "a". */
it("parses DoubleQuotedString", function() { it("parses DoubleStringCharacter", function() {
expect('start = ""' ).toParseAs(literalGrammar("")); expect('start = "a"' ).toParseAs(literalGrammar("a"));
expect('start = "a"' ).toParseAs(literalGrammar("a")); expect('start = "\\n"' ).toParseAs(literalGrammar("\n"));
expect('start = "abc"').toParseAs(literalGrammar("abc")); expect('start = "\\\n"').toParseAs(literalGrammar(""));
});
/* Canonical DoubleQuotedCharacter is "a". */
it("parses DoubleQuotedCharacter", function() {
expect('start = "a"' ).toParseAs(literalGrammar("a"));
expect('start = "\\n"' ).toParseAs(literalGrammar("\n"));
expect('start = "\\0"' ).toParseAs(literalGrammar("\x00"));
expect('start = "\\xFF"' ).toParseAs(literalGrammar("\xFF"));
expect('start = "\\uFFFF"').toParseAs(literalGrammar("\uFFFF"));
expect('start = "\\\n"' ).toParseAs(literalGrammar(""));
});
/* Canonical SimpleDoubleQuotedCharacter is "a". */
it("parses SimpleDoubleQuotedCharacter", function() {
expect('start = "a"').toParseAs(literalGrammar("a"));
expect('start = """' ).toFailToParse(); expect('start = """' ).toFailToParse();
expect('start = "\\"').toFailToParse(); expect('start = "\\"').toFailToParse();
expect('start = "\n"').toFailToParse(); expect('start = "\n"').toFailToParse();
}); });
/* Canonical SingleQuotedString is "'abcd'". */ /* Canonical SingleStringCharacter is "a". */
it("parses SingleQuotedString", function() { it("parses SingleStringCharacter", function() {
expect("start = ''" ).toParseAs(literalGrammar("")); expect("start = 'a'" ).toParseAs(literalGrammar("a"));
expect("start = 'a'" ).toParseAs(literalGrammar("a")); expect("start = '\\n'" ).toParseAs(literalGrammar("\n"));
expect("start = 'abc'").toParseAs(literalGrammar("abc")); expect("start = '\\\n'").toParseAs(literalGrammar(""));
});
/* Canonical SingleQuotedCharacter is "a". */
it("parses SingleQuotedCharacter", function() {
expect("start = 'a'" ).toParseAs(literalGrammar("a"));
expect("start = '\\n'" ).toParseAs(literalGrammar("\n"));
expect("start = '\\0'" ).toParseAs(literalGrammar("\x00"));
expect("start = '\\xFF'" ).toParseAs(literalGrammar("\xFF"));
expect("start = '\\uFFFF'").toParseAs(literalGrammar("\uFFFF"));
expect("start = '\\\n'" ).toParseAs(literalGrammar(""));
});
/* Canonical SimpleSingleQuotedCharacter is "a". */
it("parses SimpleSingleQuotedCharacter", function() {
expect("start = 'a'").toParseAs(literalGrammar("a"));
expect("start = '''" ).toFailToParse(); expect("start = '''" ).toFailToParse();
expect("start = '\\'").toFailToParse(); expect("start = '\\'").toFailToParse();
expect("start = '\n'").toFailToParse(); expect("start = '\n'").toFailToParse();
}); });
/* Canonical Class is "[a-d]". */ /* Canonical CharacterClassMatcher is "[a-d]". */
it("parses Class", function() { it("parses CharacterClassMatcher", function() {
expect('start = []' ).toParseAs(classGrammar([], "[]")); expect('start = []').toParseAs(
expect('start = [a-d]' ).toParseAs(classGrammar([["a", "d"]], "[a-d]")); classGrammar([], "[]")
expect('start = [a]' ).toParseAs(classGrammar(["a"], "[a]")); );
expect('start = [a-d]').toParseAs(
classGrammar([["a", "d"]], "[a-d]")
);
expect('start = [a]').toParseAs(
classGrammar(["a"], "[a]")
);
expect('start = [a-de-hi-l]').toParseAs( expect('start = [a-de-hi-l]').toParseAs(
classGrammar([["a", "d"], ["e", "h"], ["i", "l"]], "[a-de-hi-l]") classGrammar([["a", "d"], ["e", "h"], ["i", "l"]], "[a-de-hi-l]")
); );
expect('start = [^a-d]').toParseAs( expect('start = [^a-d]').toParseAs(
classGrammar([["a", "d"]], "[^a-d]", true, false) classGrammar([["a", "d"]], "[^a-d]", true, false)
); );
@ -520,8 +495,8 @@ describe("PEG.js grammar parser", function() {
/* Canonical ClassCharacterRange is "a-d". */ /* Canonical ClassCharacterRange is "a-d". */
it("parses ClassCharacterRange", function() { it("parses ClassCharacterRange", function() {
expect('start = [a-d]').toParseAs(classGrammar([["a", "d"]], "[a-d]")); expect('start = [a-d]').toParseAs(classGrammar([["a", "d"]], "[a-d]"));
expect('start = [a-a]').toParseAs(classGrammar([["a", "a"]], "[a-a]"));
expect('start = [a-a]').toParseAs(classGrammar([["a", "a"]], "[a-a]"));
expect('start = [b-a]').toFailToParse({ expect('start = [b-a]').toFailToParse({
message: "Invalid character range: b-a." message: "Invalid character range: b-a."
}); });
@ -529,67 +504,67 @@ describe("PEG.js grammar parser", function() {
/* Canonical ClassCharacter is "a". */ /* Canonical ClassCharacter is "a". */
it("parses ClassCharacter", function() { it("parses ClassCharacter", function() {
expect('start = [a]').toParseAs(classGrammar(["a"], "[a]")); expect('start = [a]' ).toParseAs(classGrammar(["a"], "[a]"));
expect('start = [\\n]' ).toParseAs(classGrammar(["\n"], "[\\n]"));
expect('start = [\\\n]').toParseAs(classGrammar([''], "[\\\n]"));
expect('start = []]' ).toFailToParse();
expect('start = [\\]').toFailToParse();
expect('start = [\n]').toFailToParse();
}); });
/* Canonical BracketDelimitedCharacter is "a". */ /* Canonical LineContinuation is "\\\n". */
it("parses BracketDelimitedCharacter", function() { it("parses LineContinuation", function() {
expect('start = [a]' ).toParseAs(classGrammar(["a"], "[a]")); expect('start = "\\\r\n"').toParseAs(literalGrammar(""));
expect('start = [\\n]' ).toParseAs(classGrammar(["\n"], "[\\n]"));
expect('start = [\\0]' ).toParseAs(classGrammar(["\x00"], "[\\0]"));
expect('start = [\\xFF]' ).toParseAs(classGrammar(["\xFF"], "[\\xFF]"));
expect('start = [\\uFFFF]').toParseAs(classGrammar(["\uFFFF"], "[\\uFFFF]"));
expect('start = [\\\n]' ).toParseAs(classGrammar([""], "[\\\n]"));
}); });
/* Canonical SimpleBracketDelimiedCharacter is "a". */ /* Canonical EscapeSequence is "n". */
it("parses SimpleBracketDelimitedCharacter", function() { it("parses EscapeSequence", function() {
expect('start = [a]').toParseAs(classGrammar(["a"], "[a]")); expect('start = "\\n"' ).toParseAs(literalGrammar("\n"));
expect('start = "\\0"' ).toParseAs(literalGrammar("\x00"));
expect('start = "\\xFF"' ).toParseAs(literalGrammar("\xFF"));
expect('start = "\\uFFFF"').toParseAs(literalGrammar("\uFFFF"));
expect('start = []]' ).toFailToParse(); expect('start = "\\09"').toFailToParse();
expect('start = [\\]').toFailToParse();
expect('start = [\n]').toFailToParse();
}); });
/* Canonical SimpleEscapeSequence is "\\n". */ /* Canonical CharacterEscapeSequence is "n". */
it("parses SimpleEscapeSequence", function() { it("parses CharacterEscapeSequence", function() {
expect('start = "\\b"').toParseAs(literalGrammar("\b"));
expect('start = "\\f"').toParseAs(literalGrammar("\f"));
expect('start = "\\n"').toParseAs(literalGrammar("\n")); expect('start = "\\n"').toParseAs(literalGrammar("\n"));
expect('start = "\\r"').toParseAs(literalGrammar("\r"));
expect('start = "\\t"').toParseAs(literalGrammar("\t"));
expect('start = "\\v"').toParseAs(literalGrammar("\x0B")); // no "\v" in IE
expect('start = "\\a"').toParseAs(literalGrammar("a")); expect('start = "\\a"').toParseAs(literalGrammar("a"));
expect('start = "\\1"').toFailToParse();
expect('start = "\\x"').toFailToParse();
expect('start = "\\u"').toFailToParse();
}); });
/* Canonical ZeroEscapeSequence is "\\0". */ /* Canonical SingleEscapeCharacter is "n". */
it("parses ZeroEscapeSequence", function() { it("parses SingleEscapeCharacter", function() {
expect('start = "\\0"').toParseAs(literalGrammar("\x00")); expect('start = "\\\'"').toParseAs(literalGrammar("'"));
expect('start = "\\""' ).toParseAs(literalGrammar('"'));
expect('start = "\\\\"').toParseAs(literalGrammar("\\"));
expect('start = "\\b"' ).toParseAs(literalGrammar("\b"));
expect('start = "\\f"' ).toParseAs(literalGrammar("\f"));
expect('start = "\\n"' ).toParseAs(literalGrammar("\n"));
expect('start = "\\r"' ).toParseAs(literalGrammar("\r"));
expect('start = "\\t"' ).toParseAs(literalGrammar("\t"));
expect('start = "\\v"' ).toParseAs(literalGrammar("\x0B")); // no "\v" in IE
});
expect('start = "\\00"').toFailToParse(); /* Canonical NonEscapeCharacter is "a". */
expect('start = "\\09"').toFailToParse(); it("parses NonEscapeCharacter", function() {
expect('start = "\\a"').toParseAs(literalGrammar("a"));
}); });
/* Canonical HexEscapeSequence is "\\xFF". */ /* The EscapeCharacter rule is not tested. */
/* Canonical HexEscapeSequence is "xFF". */
it("parses HexEscapeSequence", function() { it("parses HexEscapeSequence", function() {
expect('start = "\\xFF"').toParseAs(literalGrammar("\xFF")); expect('start = "\\xFF"').toParseAs(literalGrammar("\xFF"));
}); });
/* Canonical UnicodeEscapeSequence is "\\uFFFF". */ /* Canonical UnicodeEscapeSequence is "uFFFF". */
it("parses UnicodeEscapeSequence", function() { it("parses UnicodeEscapeSequence", function() {
expect('start = "\\uFFFF"').toParseAs(literalGrammar("\uFFFF")); expect('start = "\\uFFFF"').toParseAs(literalGrammar("\uFFFF"));
}); });
/* Canonical EOLEscapeSequence is "\\\n". */ /* Digit rules are not tested. */
it("parses EOLEscapeSequence", function() {
expect('start = "\\\r\n"').toParseAs(literalGrammar(""));
});
/* Trivial character class rules are not tested. */
/* Unicode character category rules are not tested. */ /* Unicode character category rules are not tested. */

@ -50,7 +50,7 @@ Initializer
Rule Rule
= name:Identifier __ = name:Identifier __
displayName:(String __)? displayName:(StringLiteral __)?
"=" __ "=" __
expression:Expression (__ ";")? { expression:Expression (__ ";")? {
return { return {
@ -158,14 +158,14 @@ Suffixed
/ Primary / Primary
Primary Primary
= name:Identifier !(__ (String __)? "=") { = name:Identifier !(__ (StringLiteral __)? "=") {
return { return {
type: "rule_ref", type: "rule_ref",
name: name name: name
}; };
} }
/ Literal / LiteralMatcher
/ Class / CharacterClassMatcher
/ "." { return { type: "any" }; } / "." { return { type: "any" }; }
/ "(" __ expression:Expression __ ")" { return expression; } / "(" __ expression:Expression __ ")" { return expression; }
@ -216,124 +216,117 @@ NonBraceCharacter
= [^{}] = [^{}]
Identifier "identifier" Identifier "identifier"
= $((Letter / "_") (Letter / Digit / "_")*) = $((Letter / "_") (Letter / DecimalDigit / "_")*)
Literal "literal" Letter
= value:(DoubleQuotedString / SingleQuotedString) flags:"i"? { = LowerCaseLetter
return { / UpperCaseLetter
type: "literal",
value: value,
ignoreCase: flags === "i"
};
}
String "string"
= string:(DoubleQuotedString / SingleQuotedString) { return string; }
DoubleQuotedString
= '"' chars:DoubleQuotedCharacter* '"' { return chars.join(""); }
DoubleQuotedCharacter
= SimpleDoubleQuotedCharacter
/ SimpleEscapeSequence
/ ZeroEscapeSequence
/ HexEscapeSequence
/ UnicodeEscapeSequence
/ EOLEscapeSequence
SimpleDoubleQuotedCharacter
= !('"' / "\\" / LineTerminator) char_:. { return char_; }
SingleQuotedString LowerCaseLetter
= "'" chars:SingleQuotedCharacter* "'" { return chars.join(""); } = [a-z]
SingleQuotedCharacter UpperCaseLetter
= SimpleSingleQuotedCharacter = [A-Z]
/ SimpleEscapeSequence
/ ZeroEscapeSequence
/ HexEscapeSequence
/ UnicodeEscapeSequence
/ EOLEscapeSequence
SimpleSingleQuotedCharacter LiteralMatcher "literal"
= !("'" / "\\" / LineTerminator) char_:. { return char_; } = value:StringLiteral ignoreCase:"i"? {
return { type: "literal", value: value, ignoreCase: ignoreCase !== null };
}
Class "character class" StringLiteral "string"
= "[" inverted:"^"? parts:(ClassCharacterRange / ClassCharacter)* "]" flags:"i"? { = '"' chars:DoubleStringCharacter* '"' { return chars.join(""); }
/ "'" chars:SingleStringCharacter* "'" { return chars.join(""); }
DoubleStringCharacter
= !('"' / "\\" / LineTerminator) SourceCharacter { return text(); }
/ "\\" sequence:EscapeSequence { return sequence; }
/ LineContinuation
SingleStringCharacter
= !("'" / "\\" / LineTerminator) SourceCharacter { return text(); }
/ "\\" sequence:EscapeSequence { return sequence; }
/ LineContinuation
CharacterClassMatcher "character class"
= "["
inverted:"^"?
parts:(ClassCharacterRange / ClassCharacter)*
"]"
ignoreCase:"i"?
{
return { return {
type: "class", type: "class",
parts: parts, parts: parts,
rawText: text().replace(/\s+$/, ""), inverted: inverted !== null,
inverted: inverted === "^", ignoreCase: ignoreCase !== null,
ignoreCase: flags === "i" rawText: text()
}; };
} }
ClassCharacterRange ClassCharacterRange
= begin:ClassCharacter "-" end:ClassCharacter { = begin:ClassCharacter "-" end:ClassCharacter {
if (begin.charCodeAt(0) > end.charCodeAt(0)) { if (begin.charCodeAt(0) > end.charCodeAt(0)) {
error("Invalid character range: " + text() + "."); error(
"Invalid character range: " + text() + "."
);
} }
return [begin, end]; return [begin, end];
} }
ClassCharacter ClassCharacter
= BracketDelimitedCharacter = !("]" / "\\" / LineTerminator) SourceCharacter { return text(); }
/ "\\" sequence:EscapeSequence { return sequence; }
/ LineContinuation
LineContinuation
= "\\" LineTerminatorSequence { return ""; }
BracketDelimitedCharacter EscapeSequence
= SimpleBracketDelimitedCharacter = CharacterEscapeSequence
/ SimpleEscapeSequence / "0" !DecimalDigit { return "\0"; }
/ ZeroEscapeSequence
/ HexEscapeSequence / HexEscapeSequence
/ UnicodeEscapeSequence / UnicodeEscapeSequence
/ EOLEscapeSequence
SimpleBracketDelimitedCharacter
= !("]" / "\\" / LineTerminator) char_:. { return char_; }
SimpleEscapeSequence
= "\\" !(Digit / "x" / "u" / LineTerminator) char_:. {
return char_
.replace("b", "\b")
.replace("f", "\f")
.replace("n", "\n")
.replace("r", "\r")
.replace("t", "\t")
.replace("v", "\x0B"); // IE does not recognize "\v".
}
ZeroEscapeSequence CharacterEscapeSequence
= "\\0" !Digit { return "\x00"; } = SingleEscapeCharacter
/ NonEscapeCharacter
SingleEscapeCharacter
= "'"
/ '"'
/ "\\"
/ "b" { return "\b"; }
/ "f" { return "\f"; }
/ "n" { return "\n"; }
/ "r" { return "\r"; }
/ "t" { return "\t"; }
/ "v" { return "\x0B"; } // IE does not recognize "\v".
NonEscapeCharacter
= !(EscapeCharacter / LineTerminator) SourceCharacter { return text(); }
EscapeCharacter
= SingleEscapeCharacter
/ DecimalDigit
/ "x"
/ "u"
HexEscapeSequence HexEscapeSequence
= "\\x" digits:$(HexDigit HexDigit) { = "x" digits:$(HexDigit HexDigit) {
return String.fromCharCode(parseInt(digits, 16)); return String.fromCharCode(parseInt(digits, 16));
} }
UnicodeEscapeSequence UnicodeEscapeSequence
= "\\u" digits:$(HexDigit HexDigit HexDigit HexDigit) { = "u" digits:$(HexDigit HexDigit HexDigit HexDigit) {
return String.fromCharCode(parseInt(digits, 16)); return String.fromCharCode(parseInt(digits, 16));
} }
EOLEscapeSequence DecimalDigit
= "\\" eol:LineTerminatorSequence { return ""; }
Digit
= [0-9] = [0-9]
HexDigit HexDigit
= [0-9a-fA-F] = [0-9a-f]i
Letter
= LowerCaseLetter
/ UpperCaseLetter
LowerCaseLetter
= [a-z]
UpperCaseLetter
= [A-Z]
/* /*
* Unicode Character Categories * Unicode Character Categories

Loading…
Cancel
Save