You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
pegjs/src/parser.pegjs

367 lines
8.1 KiB
JavaScript

{
var utils = require("./utils");
}
grammar
= __ initializer:initializer? rules:rule+ {
return {
type: "grammar",
initializer: initializer !== "" ? initializer : null,
rules: rules,
startRule: rules[0].name
};
}
initializer
= code:action semicolon? {
return {
type: "initializer",
code: code
};
}
rule
= name:identifier displayName:string? equals expression:expression semicolon? {
return {
type: "rule",
name: name,
expression: displayName !== ""
? {
type: "named",
name: displayName,
expression: expression
}
: expression
};
}
expression
= choice
choice
= head:sequence tail:(slash sequence)* {
if (tail.length > 0) {
var alternatives = [head].concat(utils.map(
tail,
function(element) { return element[1]; }
));
return {
type: "choice",
alternatives: alternatives
};
} else {
return head;
}
}
sequence
= elements:labeled* code:action {
var expression = elements.length !== 1
? {
type: "sequence",
elements: elements
}
: elements[0];
return {
type: "action",
expression: expression,
code: code
};
}
/ elements:labeled* {
return elements.length !== 1
? {
type: "sequence",
elements: elements
}
: elements[0];
}
labeled
= label:identifier colon expression:prefixed {
return {
type: "labeled",
label: label,
expression: expression
};
}
/ prefixed
prefixed
= and code:action {
return {
type: "semantic_and",
code: code
};
}
/ and expression:suffixed {
return {
type: "simple_and",
expression: expression
};
}
/ not code:action {
return {
type: "semantic_not",
code: code
};
}
/ not expression:suffixed {
return {
type: "simple_not",
expression: expression
};
}
/ suffixed
suffixed
= expression:primary question {
return {
type: "optional",
expression: expression
};
}
/ expression:primary star {
return {
type: "zero_or_more",
expression: expression
};
}
/ expression:primary plus {
return {
type: "one_or_more",
expression: expression
};
}
/ primary
primary
= name:identifier !(string? equals) {
return {
type: "rule_ref",
name: name
};
}
/ literal
/ class
/ dot { return { type: "any" }; }
/ lparen expression:expression rparen { return expression; }
/* "Lexical" elements */
action "action"
= braced:braced __ { return braced.substr(1, braced.length - 2); }
braced
= "{" parts:(braced / nonBraceCharacters)* "}" {
return "{" + parts.join("") + "}";
}
nonBraceCharacters
= chars:nonBraceCharacter+ { return chars.join(""); }
nonBraceCharacter
= [^{}]
equals = "=" __ { return "="; }
colon = ":" __ { return ":"; }
semicolon = ";" __ { return ";"; }
slash = "/" __ { return "/"; }
and = "&" __ { return "&"; }
not = "!" __ { return "!"; }
question = "?" __ { return "?"; }
star = "*" __ { return "*"; }
plus = "+" __ { return "+"; }
lparen = "(" __ { return "("; }
rparen = ")" __ { return ")"; }
dot = "." __ { return "."; }
/*
* Modeled after ECMA-262, 5th ed., 7.6, but much simplified:
*
* * no Unicode escape sequences
*
* * "Unicode combining marks" and "Unicode connection punctuation" can't be
* part of the identifier
*
* * only [a-zA-Z] is considered a "Unicode letter"
*
* * only [0-9] is considered a "Unicode digit"
*
* The simplifications were made just to make the implementation little bit
* easier, there is no "philosophical" reason behind them.
*/
identifier "identifier"
= head:(letter / "_" / "$") tail:(letter / digit / "_" / "$")* __ {
return head + tail.join("");
}
/*
* Modeled after ECMA-262, 5th ed., 7.8.4. (syntax & semantics, rules only
* vaguely).
*/
literal "literal"
= value:(doubleQuotedString / singleQuotedString) flags:"i"? __ {
return {
type: "literal",
value: value,
ignoreCase: flags === "i"
};
}
string "string"
= string:(doubleQuotedString / singleQuotedString) __ { return string; }
doubleQuotedString
= '"' chars:doubleQuotedCharacter* '"' { return chars.join(""); }
doubleQuotedCharacter
= simpleDoubleQuotedCharacter
/ simpleEscapeSequence
/ zeroEscapeSequence
/ hexEscapeSequence
/ unicodeEscapeSequence
/ eolEscapeSequence
simpleDoubleQuotedCharacter
= !('"' / "\\" / eolChar) char_:. { return char_; }
singleQuotedString
= "'" chars:singleQuotedCharacter* "'" { return chars.join(""); }
singleQuotedCharacter
= simpleSingleQuotedCharacter
/ simpleEscapeSequence
/ zeroEscapeSequence
/ hexEscapeSequence
/ unicodeEscapeSequence
/ eolEscapeSequence
simpleSingleQuotedCharacter
= !("'" / "\\" / eolChar) char_:. { return char_; }
class "character class"
= "[" inverted:"^"? parts:(classCharacterRange / classCharacter)* "]" flags:"i"? __ {
var partsConverted = utils.map(parts, function(part) { return part.data; });
var rawText = "["
+ inverted
+ utils.map(parts, function(part) { return part.rawText; }).join("")
+ "]"
+ flags;
return {
type: "class",
parts: partsConverted,
// FIXME: Get the raw text from the input directly.
rawText: rawText,
inverted: inverted === "^",
ignoreCase: flags === "i"
};
}
classCharacterRange
= begin:classCharacter "-" end:classCharacter {
if (begin.data.charCodeAt(0) > end.data.charCodeAt(0)) {
throw new this.SyntaxError(
"Invalid character range: " + begin.rawText + "-" + end.rawText + "."
);
}
return {
data: [begin.data, end.data],
// FIXME: Get the raw text from the input directly.
rawText: begin.rawText + "-" + end.rawText
};
}
classCharacter
= char_:bracketDelimitedCharacter {
return {
data: char_,
// FIXME: Get the raw text from the input directly.
rawText: utils.quoteForRegexpClass(char_)
};
}
bracketDelimitedCharacter
= simpleBracketDelimitedCharacter
/ simpleEscapeSequence
/ zeroEscapeSequence
/ hexEscapeSequence
/ unicodeEscapeSequence
/ eolEscapeSequence
simpleBracketDelimitedCharacter
= !("]" / "\\" / eolChar) char_:. { return char_; }
simpleEscapeSequence
= "\\" !(digit / "x" / "u" / eolChar) char_:. {
return char_
.replace("b", "\b")
.replace("f", "\f")
.replace("n", "\n")
.replace("r", "\r")
.replace("t", "\t")
.replace("v", "\x0B"); // IE does not recognize "\v".
}
zeroEscapeSequence
= "\\0" !digit { return "\x00"; }
hexEscapeSequence
= "\\x" h1:hexDigit h2:hexDigit {
return String.fromCharCode(parseInt(h1 + h2, 16));
}
unicodeEscapeSequence
= "\\u" h1:hexDigit h2:hexDigit h3:hexDigit h4:hexDigit {
return String.fromCharCode(parseInt(h1 + h2 + h3 + h4, 16));
}
eolEscapeSequence
= "\\" eol:eol { return eol; }
digit
= [0-9]
hexDigit
= [0-9a-fA-F]
letter
= lowerCaseLetter
/ upperCaseLetter
lowerCaseLetter
= [a-z]
upperCaseLetter
= [A-Z]
__ = (whitespace / eol / comment)*
/* Modeled after ECMA-262, 5th ed., 7.4. */
comment "comment"
= singleLineComment
/ multiLineComment
singleLineComment
= "//" (!eolChar .)*
multiLineComment
= "/*" (!"*/" .)* "*/"
/* Modeled after ECMA-262, 5th ed., 7.3. */
eol "end of line"
= "\n"
/ "\r\n"
/ "\r"
/ "\u2028"
/ "\u2029"
eolChar
= [\n\r\u2028\u2029]
/* Modeled after ECMA-262, 5th ed., 7.2. */
whitespace "whitespace"
= [ \t\v\f\u00A0\uFEFF\u1680\u180E\u2000-\u200A\u202F\u205F\u3000]