You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
pegjs/src/parser.pegjs

356 lines
7.5 KiB
Plaintext

{
var utils = require("./utils");
}
Grammar
= __ initializer:Initializer? rules:Rule+ {
return {
type: "grammar",
initializer: initializer,
rules: rules
};
}
Initializer
= code:Action Semicolon? {
return {
type: "initializer",
code: code
};
}
Rule
= name:Identifier displayName:String? Equals expression:Expression Semicolon? {
return {
type: "rule",
name: name,
expression: displayName !== null
? {
type: "named",
name: displayName,
expression: expression
}
: expression
};
}
Expression
= Choice
Choice
= head:Sequence tail:(Slash Sequence)* {
if (tail.length > 0) {
var alternatives = [head].concat(utils.map(
tail,
function(element) { return element[1]; }
));
return {
type: "choice",
alternatives: alternatives
};
} else {
return head;
}
}
Sequence
= elements:Labeled+ code:Action {
var expression = elements.length !== 1
? {
type: "sequence",
elements: elements
}
: elements[0];
return {
type: "action",
expression: expression,
code: code
};
}
/ elements:Labeled+ {
return elements.length !== 1
? {
type: "sequence",
elements: elements
}
: elements[0];
}
Labeled
= label:Identifier Colon expression:Prefixed {
return {
type: "labeled",
label: label,
expression: expression
};
}
/ Prefixed
Prefixed
= Dollar expression:Suffixed {
return {
type: "text",
expression: expression
};
}
/ And code:Action {
return {
type: "semantic_and",
code: code
};
}
/ And expression:Suffixed {
return {
type: "simple_and",
expression: expression
};
}
/ Not code:Action {
return {
type: "semantic_not",
code: code
};
}
/ Not expression:Suffixed {
return {
type: "simple_not",
expression: expression
};
}
/ Suffixed
Suffixed
= expression:Primary Question {
return {
type: "optional",
expression: expression
};
}
/ expression:Primary Star {
return {
type: "zero_or_more",
expression: expression
};
}
/ expression:Primary Plus {
return {
type: "one_or_more",
expression: expression
};
}
/ Primary
Primary
= name:Identifier !(String? Equals) {
return {
type: "rule_ref",
name: name
};
}
/ Literal
/ Class
/ Dot { return { type: "any" }; }
/ Lparen expression:Expression Rparen { return expression; }
/* "Lexical" elements */
Action "action"
= braced:Braced __ { return braced.substr(1, braced.length - 2); }
Braced
= $("{" (Braced / NonBraceCharacters)* "}")
NonBraceCharacters
= NonBraceCharacter+
NonBraceCharacter
= [^{}]
Equals = "=" __ { return "="; }
Colon = ":" __ { return ":"; }
Semicolon = ";" __ { return ";"; }
Slash = "/" __ { return "/"; }
And = "&" __ { return "&"; }
Not = "!" __ { return "!"; }
Dollar = "$" __ { return "$"; }
Question = "?" __ { return "?"; }
Star = "*" __ { return "*"; }
Plus = "+" __ { return "+"; }
Lparen = "(" __ { return "("; }
Rparen = ")" __ { return ")"; }
Dot = "." __ { return "."; }
/*
* Modeled after ECMA-262, 5th ed., 7.6, but much simplified:
*
* * no Unicode escape sequences
*
* * "Unicode combining marks" and "Unicode connection punctuation" can't be
* part of the identifier
*
* * only [a-zA-Z] is considered a "Unicode letter"
*
* * only [0-9] is considered a "Unicode digit"
*
* The simplifications were made just to make the implementation little bit
* easier, there is no "philosophical" reason behind them.
*
* Contrary to ECMA 262, the "$" character is not valid because it serves other
* purpose in the grammar.
*/
Identifier "identifier"
= chars:$((Letter / "_") (Letter / Digit / "_")*) __ { return chars; }
/*
* Modeled after ECMA-262, 5th ed., 7.8.4. (syntax & semantics, rules only
* vaguely).
*/
Literal "literal"
= value:(DoubleQuotedString / SingleQuotedString) flags:"i"? __ {
return {
type: "literal",
value: value,
ignoreCase: flags === "i"
};
}
String "string"
= string:(DoubleQuotedString / SingleQuotedString) __ { return string; }
DoubleQuotedString
= '"' chars:DoubleQuotedCharacter* '"' { return chars.join(""); }
DoubleQuotedCharacter
= SimpleDoubleQuotedCharacter
/ SimpleEscapeSequence
/ ZeroEscapeSequence
/ HexEscapeSequence
/ UnicodeEscapeSequence
/ EOLEscapeSequence
SimpleDoubleQuotedCharacter
= !('"' / "\\" / EOLChar) char_:. { return char_; }
SingleQuotedString
= "'" chars:SingleQuotedCharacter* "'" { return chars.join(""); }
SingleQuotedCharacter
= SimpleSingleQuotedCharacter
/ SimpleEscapeSequence
/ ZeroEscapeSequence
/ HexEscapeSequence
/ UnicodeEscapeSequence
/ EOLEscapeSequence
SimpleSingleQuotedCharacter
= !("'" / "\\" / EOLChar) char_:. { return char_; }
Class "character class"
= class_:(
"[" inverted:"^"? parts:(ClassCharacterRange / ClassCharacter)* "]" flags:"i"? {
return {
type: "class",
parts: parts,
rawText: text().replace(/\s+$/, ""),
inverted: inverted === "^",
ignoreCase: flags === "i"
};
}
)
__
{ return class_; }
ClassCharacterRange
= begin:ClassCharacter "-" end:ClassCharacter {
if (begin.charCodeAt(0) > end.charCodeAt(0)) {
error("Invalid character range: " + text() + ".");
}
return [begin, end];
}
ClassCharacter
= BracketDelimitedCharacter
BracketDelimitedCharacter
= SimpleBracketDelimitedCharacter
/ SimpleEscapeSequence
/ ZeroEscapeSequence
/ HexEscapeSequence
/ UnicodeEscapeSequence
/ EOLEscapeSequence
SimpleBracketDelimitedCharacter
= !("]" / "\\" / EOLChar) char_:. { return char_; }
SimpleEscapeSequence
= "\\" !(Digit / "x" / "u" / EOLChar) char_:. {
return char_
.replace("b", "\b")
.replace("f", "\f")
.replace("n", "\n")
.replace("r", "\r")
.replace("t", "\t")
.replace("v", "\x0B"); // IE does not recognize "\v".
}
ZeroEscapeSequence
= "\\0" !Digit { return "\x00"; }
HexEscapeSequence
= "\\x" digits:$(HexDigit HexDigit) {
return String.fromCharCode(parseInt(digits, 16));
}
UnicodeEscapeSequence
= "\\u" digits:$(HexDigit HexDigit HexDigit HexDigit) {
return String.fromCharCode(parseInt(digits, 16));
}
EOLEscapeSequence
= "\\" eol:EOL { return ""; }
Digit
= [0-9]
HexDigit
= [0-9a-fA-F]
Letter
= LowerCaseLetter
/ UpperCaseLetter
LowerCaseLetter
= [a-z]
UpperCaseLetter
= [A-Z]
__ = (Whitespace / EOL / Comment)*
/* Modeled after ECMA-262, 5th ed., 7.4. */
Comment "comment"
= SingleLineComment
/ MultiLineComment
SingleLineComment
= "//" (!EOLChar .)*
MultiLineComment
= "/*" (!"*/" .)* "*/"
/* Modeled after ECMA-262, 5th ed., 7.3. */
EOL "end of line"
= "\n"
/ "\r\n"
/ "\r"
/ "\u2028"
/ "\u2029"
EOLChar
= [\n\r\u2028\u2029]
/* Modeled after ECMA-262, 5th ed., 7.2. */
Whitespace "whitespace"
= [ \t\v\f\u00A0\uFEFF\u1680\u180E\u2000-\u200A\u202F\u205F\u3000]