You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
367 lines
8.1 KiB
JavaScript
367 lines
8.1 KiB
JavaScript
{
|
|
var utils = require("./utils");
|
|
}
|
|
|
|
grammar
|
|
= __ initializer:initializer? rules:rule+ {
|
|
return {
|
|
type: "grammar",
|
|
initializer: initializer !== "" ? initializer : null,
|
|
rules: rules,
|
|
startRule: rules[0].name
|
|
};
|
|
}
|
|
|
|
initializer
|
|
= code:action semicolon? {
|
|
return {
|
|
type: "initializer",
|
|
code: code
|
|
};
|
|
}
|
|
|
|
rule
|
|
= name:identifier displayName:string? equals expression:expression semicolon? {
|
|
return {
|
|
type: "rule",
|
|
name: name,
|
|
expression: displayName !== ""
|
|
? {
|
|
type: "named",
|
|
name: displayName,
|
|
expression: expression
|
|
}
|
|
: expression
|
|
};
|
|
}
|
|
|
|
expression
|
|
= choice
|
|
|
|
choice
|
|
= head:sequence tail:(slash sequence)* {
|
|
if (tail.length > 0) {
|
|
var alternatives = [head].concat(utils.map(
|
|
tail,
|
|
function(element) { return element[1]; }
|
|
));
|
|
return {
|
|
type: "choice",
|
|
alternatives: alternatives
|
|
};
|
|
} else {
|
|
return head;
|
|
}
|
|
}
|
|
|
|
sequence
|
|
= elements:labeled* code:action {
|
|
var expression = elements.length !== 1
|
|
? {
|
|
type: "sequence",
|
|
elements: elements
|
|
}
|
|
: elements[0];
|
|
return {
|
|
type: "action",
|
|
expression: expression,
|
|
code: code
|
|
};
|
|
}
|
|
/ elements:labeled* {
|
|
return elements.length !== 1
|
|
? {
|
|
type: "sequence",
|
|
elements: elements
|
|
}
|
|
: elements[0];
|
|
}
|
|
|
|
labeled
|
|
= label:identifier colon expression:prefixed {
|
|
return {
|
|
type: "labeled",
|
|
label: label,
|
|
expression: expression
|
|
};
|
|
}
|
|
/ prefixed
|
|
|
|
prefixed
|
|
= and code:action {
|
|
return {
|
|
type: "semantic_and",
|
|
code: code
|
|
};
|
|
}
|
|
/ and expression:suffixed {
|
|
return {
|
|
type: "simple_and",
|
|
expression: expression
|
|
};
|
|
}
|
|
/ not code:action {
|
|
return {
|
|
type: "semantic_not",
|
|
code: code
|
|
};
|
|
}
|
|
/ not expression:suffixed {
|
|
return {
|
|
type: "simple_not",
|
|
expression: expression
|
|
};
|
|
}
|
|
/ suffixed
|
|
|
|
suffixed
|
|
= expression:primary question {
|
|
return {
|
|
type: "optional",
|
|
expression: expression
|
|
};
|
|
}
|
|
/ expression:primary star {
|
|
return {
|
|
type: "zero_or_more",
|
|
expression: expression
|
|
};
|
|
}
|
|
/ expression:primary plus {
|
|
return {
|
|
type: "one_or_more",
|
|
expression: expression
|
|
};
|
|
}
|
|
/ primary
|
|
|
|
primary
|
|
= name:identifier !(string? equals) {
|
|
return {
|
|
type: "rule_ref",
|
|
name: name
|
|
};
|
|
}
|
|
/ literal
|
|
/ class
|
|
/ dot { return { type: "any" }; }
|
|
/ lparen expression:expression rparen { return expression; }
|
|
|
|
/* "Lexical" elements */
|
|
|
|
action "action"
|
|
= braced:braced __ { return braced.substr(1, braced.length - 2); }
|
|
|
|
braced
|
|
= "{" parts:(braced / nonBraceCharacters)* "}" {
|
|
return "{" + parts.join("") + "}";
|
|
}
|
|
|
|
nonBraceCharacters
|
|
= chars:nonBraceCharacter+ { return chars.join(""); }
|
|
|
|
nonBraceCharacter
|
|
= [^{}]
|
|
|
|
equals = "=" __ { return "="; }
|
|
colon = ":" __ { return ":"; }
|
|
semicolon = ";" __ { return ";"; }
|
|
slash = "/" __ { return "/"; }
|
|
and = "&" __ { return "&"; }
|
|
not = "!" __ { return "!"; }
|
|
question = "?" __ { return "?"; }
|
|
star = "*" __ { return "*"; }
|
|
plus = "+" __ { return "+"; }
|
|
lparen = "(" __ { return "("; }
|
|
rparen = ")" __ { return ")"; }
|
|
dot = "." __ { return "."; }
|
|
|
|
/*
|
|
* Modeled after ECMA-262, 5th ed., 7.6, but much simplified:
|
|
*
|
|
* * no Unicode escape sequences
|
|
*
|
|
* * "Unicode combining marks" and "Unicode connection punctuation" can't be
|
|
* part of the identifier
|
|
*
|
|
* * only [a-zA-Z] is considered a "Unicode letter"
|
|
*
|
|
* * only [0-9] is considered a "Unicode digit"
|
|
*
|
|
* The simplifications were made just to make the implementation little bit
|
|
* easier, there is no "philosophical" reason behind them.
|
|
*/
|
|
identifier "identifier"
|
|
= head:(letter / "_" / "$") tail:(letter / digit / "_" / "$")* __ {
|
|
return head + tail.join("");
|
|
}
|
|
|
|
/*
|
|
* Modeled after ECMA-262, 5th ed., 7.8.4. (syntax & semantics, rules only
|
|
* vaguely).
|
|
*/
|
|
literal "literal"
|
|
= value:(doubleQuotedString / singleQuotedString) flags:"i"? __ {
|
|
return {
|
|
type: "literal",
|
|
value: value,
|
|
ignoreCase: flags === "i"
|
|
};
|
|
}
|
|
|
|
string "string"
|
|
= string:(doubleQuotedString / singleQuotedString) __ { return string; }
|
|
|
|
doubleQuotedString
|
|
= '"' chars:doubleQuotedCharacter* '"' { return chars.join(""); }
|
|
|
|
doubleQuotedCharacter
|
|
= simpleDoubleQuotedCharacter
|
|
/ simpleEscapeSequence
|
|
/ zeroEscapeSequence
|
|
/ hexEscapeSequence
|
|
/ unicodeEscapeSequence
|
|
/ eolEscapeSequence
|
|
|
|
simpleDoubleQuotedCharacter
|
|
= !('"' / "\\" / eolChar) char_:. { return char_; }
|
|
|
|
singleQuotedString
|
|
= "'" chars:singleQuotedCharacter* "'" { return chars.join(""); }
|
|
|
|
singleQuotedCharacter
|
|
= simpleSingleQuotedCharacter
|
|
/ simpleEscapeSequence
|
|
/ zeroEscapeSequence
|
|
/ hexEscapeSequence
|
|
/ unicodeEscapeSequence
|
|
/ eolEscapeSequence
|
|
|
|
simpleSingleQuotedCharacter
|
|
= !("'" / "\\" / eolChar) char_:. { return char_; }
|
|
|
|
class "character class"
|
|
= "[" inverted:"^"? parts:(classCharacterRange / classCharacter)* "]" flags:"i"? __ {
|
|
var partsConverted = utils.map(parts, function(part) { return part.data; });
|
|
var rawText = "["
|
|
+ inverted
|
|
+ utils.map(parts, function(part) { return part.rawText; }).join("")
|
|
+ "]"
|
|
+ flags;
|
|
|
|
return {
|
|
type: "class",
|
|
parts: partsConverted,
|
|
// FIXME: Get the raw text from the input directly.
|
|
rawText: rawText,
|
|
inverted: inverted === "^",
|
|
ignoreCase: flags === "i"
|
|
};
|
|
}
|
|
|
|
classCharacterRange
|
|
= begin:classCharacter "-" end:classCharacter {
|
|
if (begin.data.charCodeAt(0) > end.data.charCodeAt(0)) {
|
|
throw new this.SyntaxError(
|
|
"Invalid character range: " + begin.rawText + "-" + end.rawText + "."
|
|
);
|
|
}
|
|
|
|
return {
|
|
data: [begin.data, end.data],
|
|
// FIXME: Get the raw text from the input directly.
|
|
rawText: begin.rawText + "-" + end.rawText
|
|
};
|
|
}
|
|
|
|
classCharacter
|
|
= char_:bracketDelimitedCharacter {
|
|
return {
|
|
data: char_,
|
|
// FIXME: Get the raw text from the input directly.
|
|
rawText: utils.quoteForRegexpClass(char_)
|
|
};
|
|
}
|
|
|
|
bracketDelimitedCharacter
|
|
= simpleBracketDelimitedCharacter
|
|
/ simpleEscapeSequence
|
|
/ zeroEscapeSequence
|
|
/ hexEscapeSequence
|
|
/ unicodeEscapeSequence
|
|
/ eolEscapeSequence
|
|
|
|
simpleBracketDelimitedCharacter
|
|
= !("]" / "\\" / eolChar) char_:. { return char_; }
|
|
|
|
simpleEscapeSequence
|
|
= "\\" !(digit / "x" / "u" / eolChar) char_:. {
|
|
return char_
|
|
.replace("b", "\b")
|
|
.replace("f", "\f")
|
|
.replace("n", "\n")
|
|
.replace("r", "\r")
|
|
.replace("t", "\t")
|
|
.replace("v", "\x0B"); // IE does not recognize "\v".
|
|
}
|
|
|
|
zeroEscapeSequence
|
|
= "\\0" !digit { return "\x00"; }
|
|
|
|
hexEscapeSequence
|
|
= "\\x" h1:hexDigit h2:hexDigit {
|
|
return String.fromCharCode(parseInt(h1 + h2, 16));
|
|
}
|
|
|
|
unicodeEscapeSequence
|
|
= "\\u" h1:hexDigit h2:hexDigit h3:hexDigit h4:hexDigit {
|
|
return String.fromCharCode(parseInt(h1 + h2 + h3 + h4, 16));
|
|
}
|
|
|
|
eolEscapeSequence
|
|
= "\\" eol:eol { return eol; }
|
|
|
|
digit
|
|
= [0-9]
|
|
|
|
hexDigit
|
|
= [0-9a-fA-F]
|
|
|
|
letter
|
|
= lowerCaseLetter
|
|
/ upperCaseLetter
|
|
|
|
lowerCaseLetter
|
|
= [a-z]
|
|
|
|
upperCaseLetter
|
|
= [A-Z]
|
|
|
|
__ = (whitespace / eol / comment)*
|
|
|
|
/* Modeled after ECMA-262, 5th ed., 7.4. */
|
|
comment "comment"
|
|
= singleLineComment
|
|
/ multiLineComment
|
|
|
|
singleLineComment
|
|
= "//" (!eolChar .)*
|
|
|
|
multiLineComment
|
|
= "/*" (!"*/" .)* "*/"
|
|
|
|
/* Modeled after ECMA-262, 5th ed., 7.3. */
|
|
eol "end of line"
|
|
= "\n"
|
|
/ "\r\n"
|
|
/ "\r"
|
|
/ "\u2028"
|
|
/ "\u2029"
|
|
|
|
eolChar
|
|
= [\n\r\u2028\u2029]
|
|
|
|
/* Modeled after ECMA-262, 5th ed., 7.2. */
|
|
whitespace "whitespace"
|
|
= [ \t\v\f\u00A0\uFEFF\u1680\u180E\u2000-\u200A\u202F\u205F\u3000]
|