You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
pegjs/lib/metagrammar.pegjs

218 lines
5.8 KiB
Plaintext

grammar: __ rule+ {
var result = {};
for (var i = 0; i < $2.length; i++) { result[$2[i].getName()] = $2[i]; }
return result;
}
rule: identifier (literal / "") colon expression {
return new PEG.Grammar.Rule($1, $2 !== "" ? $2 : null, $4);
}
expression: choice
choice: sequence (slash sequence)* {
return $2.length > 0
? new PEG.Grammar.Choice([$1].concat(PEG.ArrayUtils.map(
$2,
function(element) { return element[1]; }
)))
: $1;
}
sequence
: prefixed* action {
return new PEG.Grammar.Action(
$1.length != 1 ? new PEG.Grammar.Sequence($1) : $1[0],
$2
);
}
/ prefixed* { return $1.length != 1 ? new PEG.Grammar.Sequence($1) : $1[0]; }
prefixed
: and suffixed {
return new PEG.Grammar.NotPredicate(new PEG.Grammar.NotPredicate($2));
}
/ not suffixed { return new PEG.Grammar.NotPredicate($2); }
/ suffixed
suffixed
: primary question {
return new PEG.Grammar.Choice([$1, new PEG.Grammar.Literal("")]);
}
/ primary star { return new PEG.Grammar.ZeroOrMore($1); }
/ primary plus {
return new PEG.Grammar.Action(
new PEG.Grammar.Sequence([$1, new PEG.Grammar.ZeroOrMore($1)]),
function(first, rest) { return [first].concat(rest); }
);
}
/ primary
primary
: identifier !(( literal / "") colon) { return new PEG.Grammar.RuleRef($1); }
/ literal { return new PEG.Grammar.Literal($1); }
/ dot { return new PEG.Grammar.Any(); }
/ class {
return new PEG.Grammar.Choice(
PEG.ArrayUtils.map(
$1.split(""),
function(character) { return new PEG.Grammar.Literal(character); }
)
);
}
/ lparen expression rparen { return $2; }
/* "Lexical" elements */
action "action": braced __ { return $1.substr(1, $1.length - 2); }
braced: "{" (braced / nonBraceCharacter)* "}" { return $1 + $2.join("") + $3; }
nonBraceCharacters: nonBraceCharacter+ { return $1.join(""); }
nonBraceCharacter: !("{" / "}") . { return $2; }
colon: ":" __ { return $1; }
slash: "/" __ { return $1; }
and: "&" __ { return $1; }
not: "!" __ { return $1; }
question: "?" __ { return $1; }
star: "*" __ { return $1; }
plus: "+" __ { return $1; }
lparen: "(" __ { return $1; }
rparen: ")" __ { return $1; }
dot: "." __ { return $1; }
/*
* Modelled after ECMA-262, 5th ed., 7.6, but much simplified:
*
* * no Unicode escape sequences
*
* * "Unicode combining marks" and "Unicode connection punctuation" can't be
* part of the identifier
*
* * only [a-zA-Z] is considered a "Unicode letter"
*
* * only [0-9] is considered a "Unicode digit"
*
* The simplifications were made just to make the implementation little bit
* easier, there is no "philosophical" reason behind them.
*/
identifier "identifier": (letter / "_" / "$") (letter / digit / "_" / "$")* __ {
return $1 + $2.join("");
}
/*
* Modelled after ECMA-262, 5th ed., 7.8.4. (syntax & semantics, rules only
* vaguely).
*/
literal "literal": (doubleQuotedLiteral / singleQuotedLiteral) __ { return $1; }
doubleQuotedLiteral: '"' doubleQuotedCharacter* '"' { return $2.join(""); }
doubleQuotedCharacter
: simpleDoubleQuotedCharacter
/ simpleEscapeSequence
/ zeroEscapeSequence
/ hexEscapeSequence
/ unicodeEscapeSequence
/ eolEscapeSequence
simpleDoubleQuotedCharacter: !('"' / "\\" / eolChar) . { return $2; }
singleQuotedLiteral: "'" singleQuotedCharacter* "'" { return $2.join(""); }
singleQuotedCharacter
: simpleSingleQuotedCharacter
/ simpleEscapeSequence
/ zeroEscapeSequence
/ hexEscapeSequence
/ unicodeEscapeSequence
/ eolEscapeSequence
simpleSingleQuotedCharacter: !("'" / "\\" / eolChar) . { return $2; }
class "character class": "[" (classCharacterRange / classCharacter)* "]" __ {
return $2.join("");
}
classCharacterRange: bracketDelimitedCharacter "-" bracketDelimitedCharacter {
var beginCharCode = $1.charCodeAt(0);
var endCharCode = $3.charCodeAt(0);
if (beginCharCode > endCharCode) {
throw new PEG.Parser.SyntaxError(
"Invalid character range: " + $1 + "-" + $3 + "."
);
}
var result = "";
for (var charCode = beginCharCode; charCode <= endCharCode; charCode++) {
result += String.fromCharCode(charCode);
}
return result;
}
classCharacter: bracketDelimitedCharacter
bracketDelimitedCharacter
: simpleBracketDelimitedCharacter
/ simpleEscapeSequence
/ zeroEscapeSequence
/ hexEscapeSequence
/ unicodeEscapeSequence
/ eolEscapeSequence
simpleBracketDelimitedCharacter: !("]" / "\\" / eolChar) . { return $2; }
simpleEscapeSequence: "\\" !(digit / "x" / "u" / eolChar) . {
return $3
.replace("b", "\b")
.replace("f", "\f")
.replace("n", "\n")
.replace("r", "\r")
.replace("t", "\t")
.replace("v", "\v")
}
zeroEscapeSequence: "\\0" !digit { return "\0"; }
hexEscapeSequence: "\\x" hexDigit hexDigit {
return String.fromCharCode(parseInt("0x" + $2 + $3));
}
unicodeEscapeSequence: "\\u" hexDigit hexDigit hexDigit hexDigit {
return String.fromCharCode(parseInt("0x" + $2 + $3 + $4 + $5));
}
eolEscapeSequence: "\\" eol { return $2; }
digit: [0-9]
hexDigit: [0-9a-fA-F]
letter: lowerCaseLetter / upperCaseLetter
lowerCaseLetter: [a-z]
upperCaseLetter: [A-Z]
__: (whitespace / eol / comment)*
/* Modelled after ECMA-262, 5th ed., 7.4. */
comment "comment": singleLineComment / multiLineComment
singleLineComment: "//" (!eolChar .)*
multiLineComment: "/*" (!"*/" .)* "*/"
/* Modelled after ECMA-262, 5th ed., 7.3. */
eol "end of line": "\n" / "\r\n" / "\r" / "\u2028" / "\u2029"
eolChar: [\n\r\u2028\u2029]
/*
* Modelled after ECMA-262, 5th ed., 7.2. \uFEFF should be between the
* characters too, but it causes infinite loop in Rhino.
*/
whitespace "whitespace": [ \t\v\f\xA0\u1680\u180E\u2000-\u200A\u202F\u205F\u3000]