|
|
|
@ -1,36 +1,34 @@
|
|
|
|
|
/*
|
|
|
|
|
* JavaScript Grammar
|
|
|
|
|
* ==================
|
|
|
|
|
*
|
|
|
|
|
* Based on grammar from ECMA-262, 5.1 Edition [1]. Generated parser builds a
|
|
|
|
|
* syntax tree compatible with the ESTree spec [2].
|
|
|
|
|
*
|
|
|
|
|
* Limitations:
|
|
|
|
|
*
|
|
|
|
|
* * Non-BMP characters are completely ignored to avoid surrogate pair
|
|
|
|
|
* handling.
|
|
|
|
|
*
|
|
|
|
|
* * One can create identifiers containing illegal characters using Unicode
|
|
|
|
|
* escape sequences. For example, "abcd\u0020efgh" is not a valid
|
|
|
|
|
* identifier, but it is accepted by the parser.
|
|
|
|
|
*
|
|
|
|
|
* * Strict mode is not recognized. This means that within strict mode code,
|
|
|
|
|
* "implements", "interface", "let", "package", "private", "protected",
|
|
|
|
|
* "public", "static" and "yield" can be used as names. Many other
|
|
|
|
|
* restrictions and exceptions from Annex C are also not applied.
|
|
|
|
|
*
|
|
|
|
|
* All the limitations could be resolved, but the costs would likely outweigh
|
|
|
|
|
* the benefits.
|
|
|
|
|
*
|
|
|
|
|
* Many thanks to inimino [3] for his grammar [4] which helped me to solve some
|
|
|
|
|
* problems (such as automatic semicolon insertion) and also served to double
|
|
|
|
|
* check that I converted the original grammar correctly.
|
|
|
|
|
*
|
|
|
|
|
* [1] http://www.ecma-international.org/publications/standards/Ecma-262.htm
|
|
|
|
|
* [2] https://github.com/estree/estree
|
|
|
|
|
* [3] http://inimino.org/~inimino/blog/
|
|
|
|
|
* [4] http://boshi.inimino.org/3box/asof/1270029991384/PEG/ECMAScript_unified.peg
|
|
|
|
|
*/
|
|
|
|
|
// JavaScript Grammar
|
|
|
|
|
// ==================
|
|
|
|
|
//
|
|
|
|
|
// Based on grammar from ECMA-262, 5.1 Edition [1]. Generated parser builds a
|
|
|
|
|
// syntax tree compatible with the ESTree spec [2].
|
|
|
|
|
//
|
|
|
|
|
// Limitations:
|
|
|
|
|
//
|
|
|
|
|
// * Non-BMP characters are completely ignored to avoid surrogate pair
|
|
|
|
|
// handling.
|
|
|
|
|
//
|
|
|
|
|
// * One can create identifiers containing illegal characters using Unicode
|
|
|
|
|
// escape sequences. For example, "abcd\u0020efgh" is not a valid
|
|
|
|
|
// identifier, but it is accepted by the parser.
|
|
|
|
|
//
|
|
|
|
|
// * Strict mode is not recognized. This means that within strict mode code,
|
|
|
|
|
// "implements", "interface", "let", "package", "private", "protected",
|
|
|
|
|
// "public", "static" and "yield" can be used as names. Many other
|
|
|
|
|
// restrictions and exceptions from Annex C are also not applied.
|
|
|
|
|
//
|
|
|
|
|
// All the limitations could be resolved, but the costs would likely outweigh
|
|
|
|
|
// the benefits.
|
|
|
|
|
//
|
|
|
|
|
// Many thanks to inimino [3] for his grammar [4] which helped me to solve some
|
|
|
|
|
// problems (such as automatic semicolon insertion) and also served to double
|
|
|
|
|
// check that I converted the original grammar correctly.
|
|
|
|
|
//
|
|
|
|
|
// [1] http://www.ecma-international.org/publications/standards/Ecma-262.htm
|
|
|
|
|
// [2] https://github.com/estree/estree
|
|
|
|
|
// [3] http://inimino.org/~inimino/blog/
|
|
|
|
|
// [4] http://boshi.inimino.org/3box/asof/1270029991384/PEG/ECMAScript_unified.peg
|
|
|
|
|
|
|
|
|
|
{
|
|
|
|
|
var TYPES_TO_PROPERTY_NAMES = {
|
|
|
|
@ -85,7 +83,7 @@
|
|
|
|
|
Start
|
|
|
|
|
= __ program:Program __ { return program; }
|
|
|
|
|
|
|
|
|
|
/* ----- A.1 Lexical Grammar ----- */
|
|
|
|
|
// ----- A.1 Lexical Grammar -----
|
|
|
|
|
|
|
|
|
|
SourceCharacter
|
|
|
|
|
= .
|
|
|
|
@ -222,10 +220,8 @@ BooleanLiteral
|
|
|
|
|
= TrueToken { return { type: "Literal", value: true }; }
|
|
|
|
|
/ FalseToken { return { type: "Literal", value: false }; }
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* The "!(IdentifierStart / DecimalDigit)" predicate is not part of the official
|
|
|
|
|
* grammar, it comes from text in section 7.8.3.
|
|
|
|
|
*/
|
|
|
|
|
// The "!(IdentifierStart / DecimalDigit)" predicate is not part of the official
|
|
|
|
|
// grammar, it comes from text in section 7.8.3.
|
|
|
|
|
NumericLiteral "number"
|
|
|
|
|
= literal:HexIntegerLiteral !(IdentifierStart / DecimalDigit) {
|
|
|
|
|
return literal;
|
|
|
|
@ -375,30 +371,28 @@ RegularExpressionClassChar
|
|
|
|
|
RegularExpressionFlags
|
|
|
|
|
= IdentifierPart*
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Unicode Character Categories
|
|
|
|
|
*
|
|
|
|
|
* Extracted from the following Unicode Character Database file:
|
|
|
|
|
*
|
|
|
|
|
* http://www.unicode.org/Public/8.0.0/ucd/extracted/DerivedGeneralCategory.txt
|
|
|
|
|
*
|
|
|
|
|
* Unix magic used:
|
|
|
|
|
*
|
|
|
|
|
* grep "; $CATEGORY" DerivedGeneralCategory.txt | # Filter characters
|
|
|
|
|
* cut -f1 -d " " | # Extract code points
|
|
|
|
|
* grep -v '[0-9a-fA-F]\{5\}' | # Exclude non-BMP characters
|
|
|
|
|
* sed -e 's/\.\./-/' | # Adjust formatting
|
|
|
|
|
* sed -e 's/\([0-9a-fA-F]\{4\}\)/\\u\1/g' | # Adjust formatting
|
|
|
|
|
* tr -d '\n' # Join lines
|
|
|
|
|
*
|
|
|
|
|
* ECMA-262 allows using Unicode 3.0 or later, version 8.0.0 was the latest one
|
|
|
|
|
* at the time of writing.
|
|
|
|
|
*
|
|
|
|
|
* Non-BMP characters are completely ignored to avoid surrogate pair handling
|
|
|
|
|
* (detecting surrogate pairs isn't possible with a simple character class and
|
|
|
|
|
* other methods would degrade performance). I don't consider it a big deal as
|
|
|
|
|
* even parsers in JavaScript engines of common browsers seem to ignore them.
|
|
|
|
|
*/
|
|
|
|
|
// Unicode Character Categories
|
|
|
|
|
//
|
|
|
|
|
// Extracted from the following Unicode Character Database file:
|
|
|
|
|
//
|
|
|
|
|
// http://www.unicode.org/Public/8.0.0/ucd/extracted/DerivedGeneralCategory.txt
|
|
|
|
|
//
|
|
|
|
|
// Unix magic used:
|
|
|
|
|
//
|
|
|
|
|
// grep "; $CATEGORY" DerivedGeneralCategory.txt | # Filter characters
|
|
|
|
|
// cut -f1 -d " " | # Extract code points
|
|
|
|
|
// grep -v '[0-9a-fA-F]\{5\}' | # Exclude non-BMP characters
|
|
|
|
|
// sed -e 's/\.\./-/' | # Adjust formatting
|
|
|
|
|
// sed -e 's/\([0-9a-fA-F]\{4\}\)/\\u\1/g' | # Adjust formatting
|
|
|
|
|
// tr -d '\n' # Join lines
|
|
|
|
|
//
|
|
|
|
|
// ECMA-262 allows using Unicode 3.0 or later, version 8.0.0 was the latest one
|
|
|
|
|
// at the time of writing.
|
|
|
|
|
//
|
|
|
|
|
// Non-BMP characters are completely ignored to avoid surrogate pair handling
|
|
|
|
|
// (detecting surrogate pairs isn't possible with a simple character class and
|
|
|
|
|
// other methods would degrade performance). I don't consider it a big deal as
|
|
|
|
|
// even parsers in JavaScript engines of common browsers seem to ignore them.
|
|
|
|
|
|
|
|
|
|
// Letter, Lowercase
|
|
|
|
|
Ll = [\u0061-\u007A\u00B5\u00DF-\u00F6\u00F8-\u00FF\u0101\u0103\u0105\u0107\u0109\u010B\u010D\u010F\u0111\u0113\u0115\u0117\u0119\u011B\u011D\u011F\u0121\u0123\u0125\u0127\u0129\u012B\u012D\u012F\u0131\u0133\u0135\u0137-\u0138\u013A\u013C\u013E\u0140\u0142\u0144\u0146\u0148-\u0149\u014B\u014D\u014F\u0151\u0153\u0155\u0157\u0159\u015B\u015D\u015F\u0161\u0163\u0165\u0167\u0169\u016B\u016D\u016F\u0171\u0173\u0175\u0177\u017A\u017C\u017E-\u0180\u0183\u0185\u0188\u018C-\u018D\u0192\u0195\u0199-\u019B\u019E\u01A1\u01A3\u01A5\u01A8\u01AA-\u01AB\u01AD\u01B0\u01B4\u01B6\u01B9-\u01BA\u01BD-\u01BF\u01C6\u01C9\u01CC\u01CE\u01D0\u01D2\u01D4\u01D6\u01D8\u01DA\u01DC-\u01DD\u01DF\u01E1\u01E3\u01E5\u01E7\u01E9\u01EB\u01ED\u01EF-\u01F0\u01F3\u01F5\u01F9\u01FB\u01FD\u01FF\u0201\u0203\u0205\u0207\u0209\u020B\u020D\u020F\u0211\u0213\u0215\u0217\u0219\u021B\u021D\u021F\u0221\u0223\u0225\u0227\u0229\u022B\u022D\u022F\u0231\u0233-\u0239\u023C\u023F-\u0240\u0242\u0247\u0249\u024B\u024D\u024F-\u0293\u0295-\u02AF\u0371\u0373\u0377\u037B-\u037D\u0390\u03AC-\u03CE\u03D0-\u03D1\u03D5-\u03D7\u03D9\u03DB\u03DD\u03DF\u03E1\u03E3\u03E5\u03E7\u03E9\u03EB\u03ED\u03EF-\u03F3\u03F5\u03F8\u03FB-\u03FC\u0430-\u045F\u0461\u0463\u0465\u0467\u0469\u046B\u046D\u046F\u0471\u0473\u0475\u0477\u0479\u047B\u047D\u047F\u0481\u048B\u048D\u048F\u0491\u0493\u0495\u0497\u0499\u049B\u049D\u049F\u04A1\u04A3\u04A5\u04A7\u04A9\u04AB\u04AD\u04AF\u04B1\u04B3\u04B5\u04B7\u04B9\u04BB\u04BD\u04BF\u04C2\u04C4\u04C6\u04C8\u04CA\u04CC\u04CE-\u04CF\u04D1\u04D3\u04D5\u04D7\u04D9\u04DB\u04DD\u04DF\u04E1\u04E3\u04E5\u04E7\u04E9\u04EB\u04ED\u04EF\u04F1\u04F3\u04F5\u04F7\u04F9\u04FB\u04FD\u04FF\u0501\u0503\u0505\u0507\u0509\u050B\u050D\u050F\u0511\u0513\u0515\u0517\u0519\u051B\u051D\u051F\u0521\u0523\u0525\u0527\u0529\u052B\u052D\u052F\u0561-\u0587\u13F8-\u13FD\u1D00-\u1D2B\u1D6B-\u1D77\u1D79-\u1D9A\u1E01\u1E03\u1E05\u1E07\u1E09\u1E0B\u1E0D\u1E0F\u1E11\u1E13\u1E15\u1E17\u1E19\u1E1B\u1E1D\u1E1F\u1E21\u1E23\u1E25\u1E27\u1E29\u1E2B\u1E2D\u1E2F\u1E31\u1E33\u1E35\u1E37\u1E39\u1E3B\u1E3D\u1E3F\u1E41\u1E43\u1E45\u1E47\u1E49\u1E4B\u1E4D\u1E4F\u1E51\u1E53\u1E55\u1E57\u1E59\u1E5B\u1E5D\u1E5F\u1E61\u1E63\u1E65\u1E67\u1E69\u1E6B\u1E6D\u1E6F\u1E71\u1E73\u1E75\u1E77\u1E79\u1E7B\u1E7D\u1E7F\u1E81\u1E83\u1E85\u1E87\u1E89\u1E8B\u1E8D\u1E8F\u1E91\u1E93\u1E95-\u1E9D\u1E9F\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7\u1EB9\u1EBB\u1EBD\u1EBF\u1EC1\u1EC3\u1EC5\u1EC7\u1EC9\u1ECB\u1ECD\u1ECF\u1ED1\u1ED3\u1ED5\u1ED7\u1ED9\u1EDB\u1EDD\u1EDF\u1EE1\u1EE3\u1EE5\u1EE7\u1EE9\u1EEB\u1EED\u1EEF\u1EF1\u1EF3\u1EF5\u1EF7\u1EF9\u1EFB\u1EFD\u1EFF-\u1F07\u1F10-\u1F15\u1F20-\u1F27\u1F30-\u1F37\u1F40-\u1F45\u1F50-\u1F57\u1F60-\u1F67\u1F70-\u1F7D\u1F80-\u1F87\u1F90-\u1F97\u1FA0-\u1FA7\u1FB0-\u1FB4\u1FB6-\u1FB7\u1FBE\u1FC2-\u1FC4\u1FC6-\u1FC7\u1FD0-\u1FD3\u1FD6-\u1FD7\u1FE0-\u1FE7\u1FF2-\u1FF4\u1FF6-\u1FF7\u210A\u210E-\u210F\u2113\u212F\u2134\u2139\u213C-\u213D\u2146-\u2149\u214E\u2184\u2C30-\u2C5E\u2C61\u2C65-\u2C66\u2C68\u2C6A\u2C6C\u2C71\u2C73-\u2C74\u2C76-\u2C7B\u2C81\u2C83\u2C85\u2C87\u2C89\u2C8B\u2C8D\u2C8F\u2C91\u2C93\u2C95\u2C97\u2C99\u2C9B\u2C9D\u2C9F\u2CA1\u2CA3\u2CA5\u2CA7\u2CA9\u2CAB\u2CAD\u2CAF\u2CB1\u2CB3\u2CB5\u2CB7\u2CB9\u2CBB\u2CBD\u2CBF\u2CC1\u2CC3\u2CC5\u2CC7\u2CC9\u2CCB\u2CCD\u2CCF\u2CD1\u2CD3\u2CD5\u2CD7\u2CD9\u2CDB\u2CDD\u2CDF\u2CE1\u2CE3-\u2CE4\u2CEC\u2CEE\u2CF3\u2D00-\u2D25\u2D27\u2D2D\uA641\uA643\uA645\uA647\uA649\uA64B\uA64D\uA64F\uA651\uA653\uA655\uA657\uA659\uA65B\uA65D\uA65F\uA661\uA663\uA665\uA667\uA669\uA66B\uA66D\uA681\uA683\uA685\uA687\uA689\uA68B\uA68D\uA68F\uA691\uA693\uA695\uA697\uA699\uA69B\uA723\uA725\uA727\uA729\uA72B\uA72D\uA72F-\uA731\uA733\uA735\uA737\uA739\uA73B\uA73D\uA73F\uA741\uA743\uA745\uA747\uA749\uA74B\uA74D\uA74F\uA751\uA753\uA755\uA757\uA759\uA75B\uA75D\uA75F\uA761\uA763\uA765\uA767\uA769\uA76B\uA76D\uA76F\uA771-\uA778\uA77A\uA77C\uA77F\uA781\uA783\uA785\uA787\uA78C\uA78E\uA791\uA793-\uA795\uA797\uA799\uA79B\uA79D\uA79F\uA7A1\uA7A3\uA7A5\uA7A7\uA7A9\uA7B5\uA7B7\uA7FA\uAB30-\uAB5A\uAB60-\uAB65\uAB70-\uABBF\uFB00-\uFB06\uFB13-\uFB17\uFF41-\uFF5A]
|
|
|
|
@ -433,7 +427,7 @@ Pc = [\u005F\u203F-\u2040\u2054\uFE33-\uFE34\uFE4D-\uFE4F\uFF3F]
|
|
|
|
|
// Separator, Space
|
|
|
|
|
Zs = [\u0020\u00A0\u1680\u2000-\u200A\u202F\u205F\u3000]
|
|
|
|
|
|
|
|
|
|
/* Tokens */
|
|
|
|
|
// Tokens
|
|
|
|
|
|
|
|
|
|
BreakToken = "break" !IdentifierPart
|
|
|
|
|
CaseToken = "case" !IdentifierPart
|
|
|
|
@ -474,7 +468,7 @@ VoidToken = "void" !IdentifierPart
|
|
|
|
|
WhileToken = "while" !IdentifierPart
|
|
|
|
|
WithToken = "with" !IdentifierPart
|
|
|
|
|
|
|
|
|
|
/* Skipped */
|
|
|
|
|
// Skipped
|
|
|
|
|
|
|
|
|
|
__
|
|
|
|
|
= (WhiteSpace / LineTerminatorSequence / Comment)*
|
|
|
|
@ -482,7 +476,7 @@ __
|
|
|
|
|
_
|
|
|
|
|
= (WhiteSpace / MultiLineCommentNoLineTerminator)*
|
|
|
|
|
|
|
|
|
|
/* Automatic Semicolon Insertion */
|
|
|
|
|
// Automatic Semicolon Insertion
|
|
|
|
|
|
|
|
|
|
EOS
|
|
|
|
|
= __ ";"
|
|
|
|
@ -493,11 +487,11 @@ EOS
|
|
|
|
|
EOF
|
|
|
|
|
= !.
|
|
|
|
|
|
|
|
|
|
/* ----- A.2 Number Conversions ----- */
|
|
|
|
|
// ----- A.2 Number Conversions -----
|
|
|
|
|
|
|
|
|
|
/* Irrelevant. */
|
|
|
|
|
// Irrelevant.
|
|
|
|
|
|
|
|
|
|
/* ----- A.3 Expressions ----- */
|
|
|
|
|
// ----- A.3 Expressions -----
|
|
|
|
|
|
|
|
|
|
PrimaryExpression
|
|
|
|
|
= ThisToken { return { type: "ThisExpression" }; }
|
|
|
|
@ -962,7 +956,7 @@ ExpressionNoIn
|
|
|
|
|
: head;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* ----- A.4 Statements ----- */
|
|
|
|
|
// ----- A.4 Statements -----
|
|
|
|
|
|
|
|
|
|
Statement
|
|
|
|
|
= Block
|
|
|
|
@ -1274,7 +1268,7 @@ Finally
|
|
|
|
|
DebuggerStatement
|
|
|
|
|
= DebuggerToken EOS { return { type: "DebuggerStatement" }; }
|
|
|
|
|
|
|
|
|
|
/* ----- A.5 Functions and Programs ----- */
|
|
|
|
|
// ----- A.5 Functions and Programs -----
|
|
|
|
|
|
|
|
|
|
FunctionDeclaration
|
|
|
|
|
= FunctionToken __ id:Identifier __
|
|
|
|
@ -1332,14 +1326,14 @@ SourceElement
|
|
|
|
|
= Statement
|
|
|
|
|
/ FunctionDeclaration
|
|
|
|
|
|
|
|
|
|
/* ----- A.6 Universal Resource Identifier Character Classes ----- */
|
|
|
|
|
// ----- A.6 Universal Resource Identifier Character Classes -----
|
|
|
|
|
|
|
|
|
|
/* Irrelevant. */
|
|
|
|
|
// Irrelevant.
|
|
|
|
|
|
|
|
|
|
/* ----- A.7 Regular Expressions ----- */
|
|
|
|
|
// ----- A.7 Regular Expressions -----
|
|
|
|
|
|
|
|
|
|
/* Irrelevant. */
|
|
|
|
|
// Irrelevant.
|
|
|
|
|
|
|
|
|
|
/* ----- A.8 JSON ----- */
|
|
|
|
|
// ----- A.8 JSON -----
|
|
|
|
|
|
|
|
|
|
/* Irrelevant. */
|
|
|
|
|
// Irrelevant.
|
|
|
|
|