You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
356 lines
7.5 KiB
JavaScript
356 lines
7.5 KiB
JavaScript
{
|
|
var utils = require("./utils");
|
|
}
|
|
|
|
Grammar
|
|
= __ initializer:Initializer? rules:Rule+ {
|
|
return {
|
|
type: "grammar",
|
|
initializer: initializer,
|
|
rules: rules
|
|
};
|
|
}
|
|
|
|
Initializer
|
|
= code:Action Semicolon? {
|
|
return {
|
|
type: "initializer",
|
|
code: code
|
|
};
|
|
}
|
|
|
|
Rule
|
|
= name:Identifier displayName:String? Equals expression:Expression Semicolon? {
|
|
return {
|
|
type: "rule",
|
|
name: name,
|
|
expression: displayName !== null
|
|
? {
|
|
type: "named",
|
|
name: displayName,
|
|
expression: expression
|
|
}
|
|
: expression
|
|
};
|
|
}
|
|
|
|
Expression
|
|
= Choice
|
|
|
|
Choice
|
|
= head:Sequence tail:(Slash Sequence)* {
|
|
if (tail.length > 0) {
|
|
var alternatives = [head].concat(utils.map(
|
|
tail,
|
|
function(element) { return element[1]; }
|
|
));
|
|
return {
|
|
type: "choice",
|
|
alternatives: alternatives
|
|
};
|
|
} else {
|
|
return head;
|
|
}
|
|
}
|
|
|
|
Sequence
|
|
= elements:Labeled+ code:Action {
|
|
var expression = elements.length !== 1
|
|
? {
|
|
type: "sequence",
|
|
elements: elements
|
|
}
|
|
: elements[0];
|
|
return {
|
|
type: "action",
|
|
expression: expression,
|
|
code: code
|
|
};
|
|
}
|
|
/ elements:Labeled+ {
|
|
return elements.length !== 1
|
|
? {
|
|
type: "sequence",
|
|
elements: elements
|
|
}
|
|
: elements[0];
|
|
}
|
|
|
|
Labeled
|
|
= label:Identifier Colon expression:Prefixed {
|
|
return {
|
|
type: "labeled",
|
|
label: label,
|
|
expression: expression
|
|
};
|
|
}
|
|
/ Prefixed
|
|
|
|
Prefixed
|
|
= Dollar expression:Suffixed {
|
|
return {
|
|
type: "text",
|
|
expression: expression
|
|
};
|
|
}
|
|
/ And code:Action {
|
|
return {
|
|
type: "semantic_and",
|
|
code: code
|
|
};
|
|
}
|
|
/ And expression:Suffixed {
|
|
return {
|
|
type: "simple_and",
|
|
expression: expression
|
|
};
|
|
}
|
|
/ Not code:Action {
|
|
return {
|
|
type: "semantic_not",
|
|
code: code
|
|
};
|
|
}
|
|
/ Not expression:Suffixed {
|
|
return {
|
|
type: "simple_not",
|
|
expression: expression
|
|
};
|
|
}
|
|
/ Suffixed
|
|
|
|
Suffixed
|
|
= expression:Primary Question {
|
|
return {
|
|
type: "optional",
|
|
expression: expression
|
|
};
|
|
}
|
|
/ expression:Primary Star {
|
|
return {
|
|
type: "zero_or_more",
|
|
expression: expression
|
|
};
|
|
}
|
|
/ expression:Primary Plus {
|
|
return {
|
|
type: "one_or_more",
|
|
expression: expression
|
|
};
|
|
}
|
|
/ Primary
|
|
|
|
Primary
|
|
= name:Identifier !(String? Equals) {
|
|
return {
|
|
type: "rule_ref",
|
|
name: name
|
|
};
|
|
}
|
|
/ Literal
|
|
/ Class
|
|
/ Dot { return { type: "any" }; }
|
|
/ Lparen expression:Expression Rparen { return expression; }
|
|
|
|
/* "Lexical" elements */
|
|
|
|
Action "action"
|
|
= braced:Braced __ { return braced.substr(1, braced.length - 2); }
|
|
|
|
Braced
|
|
= $("{" (Braced / NonBraceCharacters)* "}")
|
|
|
|
NonBraceCharacters
|
|
= NonBraceCharacter+
|
|
|
|
NonBraceCharacter
|
|
= [^{}]
|
|
|
|
Equals = "=" __ { return "="; }
|
|
Colon = ":" __ { return ":"; }
|
|
Semicolon = ";" __ { return ";"; }
|
|
Slash = "/" __ { return "/"; }
|
|
And = "&" __ { return "&"; }
|
|
Not = "!" __ { return "!"; }
|
|
Dollar = "$" __ { return "$"; }
|
|
Question = "?" __ { return "?"; }
|
|
Star = "*" __ { return "*"; }
|
|
Plus = "+" __ { return "+"; }
|
|
Lparen = "(" __ { return "("; }
|
|
Rparen = ")" __ { return ")"; }
|
|
Dot = "." __ { return "."; }
|
|
|
|
/*
|
|
* Modeled after ECMA-262, 5th ed., 7.6, but much simplified:
|
|
*
|
|
* * no Unicode escape sequences
|
|
*
|
|
* * "Unicode combining marks" and "Unicode connection punctuation" can't be
|
|
* part of the identifier
|
|
*
|
|
* * only [a-zA-Z] is considered a "Unicode letter"
|
|
*
|
|
* * only [0-9] is considered a "Unicode digit"
|
|
*
|
|
* The simplifications were made just to make the implementation little bit
|
|
* easier, there is no "philosophical" reason behind them.
|
|
*
|
|
* Contrary to ECMA 262, the "$" character is not valid because it serves other
|
|
* purpose in the grammar.
|
|
*/
|
|
Identifier "identifier"
|
|
= chars:$((Letter / "_") (Letter / Digit / "_")*) __ { return chars; }
|
|
|
|
/*
|
|
* Modeled after ECMA-262, 5th ed., 7.8.4. (syntax & semantics, rules only
|
|
* vaguely).
|
|
*/
|
|
Literal "literal"
|
|
= value:(DoubleQuotedString / SingleQuotedString) flags:"i"? __ {
|
|
return {
|
|
type: "literal",
|
|
value: value,
|
|
ignoreCase: flags === "i"
|
|
};
|
|
}
|
|
|
|
String "string"
|
|
= string:(DoubleQuotedString / SingleQuotedString) __ { return string; }
|
|
|
|
DoubleQuotedString
|
|
= '"' chars:DoubleQuotedCharacter* '"' { return chars.join(""); }
|
|
|
|
DoubleQuotedCharacter
|
|
= SimpleDoubleQuotedCharacter
|
|
/ SimpleEscapeSequence
|
|
/ ZeroEscapeSequence
|
|
/ HexEscapeSequence
|
|
/ UnicodeEscapeSequence
|
|
/ EOLEscapeSequence
|
|
|
|
SimpleDoubleQuotedCharacter
|
|
= !('"' / "\\" / EOLChar) char_:. { return char_; }
|
|
|
|
SingleQuotedString
|
|
= "'" chars:SingleQuotedCharacter* "'" { return chars.join(""); }
|
|
|
|
SingleQuotedCharacter
|
|
= SimpleSingleQuotedCharacter
|
|
/ SimpleEscapeSequence
|
|
/ ZeroEscapeSequence
|
|
/ HexEscapeSequence
|
|
/ UnicodeEscapeSequence
|
|
/ EOLEscapeSequence
|
|
|
|
SimpleSingleQuotedCharacter
|
|
= !("'" / "\\" / EOLChar) char_:. { return char_; }
|
|
|
|
Class "character class"
|
|
= class_:(
|
|
"[" inverted:"^"? parts:(ClassCharacterRange / ClassCharacter)* "]" flags:"i"? {
|
|
return {
|
|
type: "class",
|
|
parts: parts,
|
|
rawText: text().replace(/\s+$/, ""),
|
|
inverted: inverted === "^",
|
|
ignoreCase: flags === "i"
|
|
};
|
|
}
|
|
)
|
|
__
|
|
{ return class_; }
|
|
|
|
ClassCharacterRange
|
|
= begin:ClassCharacter "-" end:ClassCharacter {
|
|
if (begin.charCodeAt(0) > end.charCodeAt(0)) {
|
|
error("Invalid character range: " + text() + ".");
|
|
}
|
|
|
|
return [begin, end];
|
|
}
|
|
|
|
ClassCharacter
|
|
= BracketDelimitedCharacter
|
|
|
|
BracketDelimitedCharacter
|
|
= SimpleBracketDelimitedCharacter
|
|
/ SimpleEscapeSequence
|
|
/ ZeroEscapeSequence
|
|
/ HexEscapeSequence
|
|
/ UnicodeEscapeSequence
|
|
/ EOLEscapeSequence
|
|
|
|
SimpleBracketDelimitedCharacter
|
|
= !("]" / "\\" / EOLChar) char_:. { return char_; }
|
|
|
|
SimpleEscapeSequence
|
|
= "\\" !(Digit / "x" / "u" / EOLChar) char_:. {
|
|
return char_
|
|
.replace("b", "\b")
|
|
.replace("f", "\f")
|
|
.replace("n", "\n")
|
|
.replace("r", "\r")
|
|
.replace("t", "\t")
|
|
.replace("v", "\x0B"); // IE does not recognize "\v".
|
|
}
|
|
|
|
ZeroEscapeSequence
|
|
= "\\0" !Digit { return "\x00"; }
|
|
|
|
HexEscapeSequence
|
|
= "\\x" digits:$(HexDigit HexDigit) {
|
|
return String.fromCharCode(parseInt(digits, 16));
|
|
}
|
|
|
|
UnicodeEscapeSequence
|
|
= "\\u" digits:$(HexDigit HexDigit HexDigit HexDigit) {
|
|
return String.fromCharCode(parseInt(digits, 16));
|
|
}
|
|
|
|
EOLEscapeSequence
|
|
= "\\" eol:EOL { return ""; }
|
|
|
|
Digit
|
|
= [0-9]
|
|
|
|
HexDigit
|
|
= [0-9a-fA-F]
|
|
|
|
Letter
|
|
= LowerCaseLetter
|
|
/ UpperCaseLetter
|
|
|
|
LowerCaseLetter
|
|
= [a-z]
|
|
|
|
UpperCaseLetter
|
|
= [A-Z]
|
|
|
|
__ = (Whitespace / EOL / Comment)*
|
|
|
|
/* Modeled after ECMA-262, 5th ed., 7.4. */
|
|
Comment "comment"
|
|
= SingleLineComment
|
|
/ MultiLineComment
|
|
|
|
SingleLineComment
|
|
= "//" (!EOLChar .)*
|
|
|
|
MultiLineComment
|
|
= "/*" (!"*/" .)* "*/"
|
|
|
|
/* Modeled after ECMA-262, 5th ed., 7.3. */
|
|
EOL "end of line"
|
|
= "\n"
|
|
/ "\r\n"
|
|
/ "\r"
|
|
/ "\u2028"
|
|
/ "\u2029"
|
|
|
|
EOLChar
|
|
= [\n\r\u2028\u2029]
|
|
|
|
/* Modeled after ECMA-262, 5th ed., 7.2. */
|
|
Whitespace "whitespace"
|
|
= [ \t\v\f\u00A0\uFEFF\u1680\u180E\u2000-\u200A\u202F\u205F\u3000]
|