Browse Source
This is a complete rewrite of the JSON example grammar. It is now based on RFC 7159 instead of an informal description at the JSON website. Beside this, the rewrite reflects how I write grammars today (as opposed to few years ago) and what style I would recommend to others.redux
1 changed files with 108 additions and 95 deletions
@ -1,119 +1,132 @@ |
|||
/* JSON parser based on the grammar described at http://json.org/. */ |
|||
/* |
|||
* JSON Grammar |
|||
* ============ |
|||
* |
|||
* Based on the grammar from RFC 7159 [1]. |
|||
* |
|||
* Note that JSON is also specified in ECMA-262 [2], ECMA-404 [3], and on the |
|||
* JSON website [4] (somewhat informally). The RFC seems the most authoritative |
|||
* source, which is confirmed e.g. by [5]. |
|||
* |
|||
* [1] http://tools.ietf.org/html/rfc7159 |
|||
* [2] http://www.ecma-international.org/publications/standards/Ecma-262.htm |
|||
* [3] http://www.ecma-international.org/publications/standards/Ecma-404.htm |
|||
* [4] http://json.org/ |
|||
* [5] https://www.tbray.org/ongoing/When/201x/2014/03/05/RFC7159-JSON |
|||
*/ |
|||
|
|||
/* ===== Syntactical Elements ===== */ |
|||
/* ----- 2. JSON Grammar ----- */ |
|||
|
|||
start |
|||
= _ object:object { return object; } |
|||
JSON_text |
|||
= ws value:value ws { return value; } |
|||
|
|||
object |
|||
= "{" _ "}" _ { return {}; } |
|||
/ "{" _ members:members "}" _ { return members; } |
|||
|
|||
members |
|||
= head:pair tail:("," _ pair)* { |
|||
var result = {}; |
|||
result[head[0]] = head[1]; |
|||
for (var i = 0; i < tail.length; i++) { |
|||
result[tail[i][2][0]] = tail[i][2][1]; |
|||
} |
|||
return result; |
|||
} |
|||
begin_array = ws "[" ws |
|||
begin_object = ws "{" ws |
|||
end_array = ws "]" ws |
|||
end_object = ws "}" ws |
|||
name_separator = ws ":" ws |
|||
value_separator = ws "," ws |
|||
|
|||
pair |
|||
= name:string ":" _ value:value { return [name, value]; } |
|||
ws "whitespace" = [ \t\n\r]* |
|||
|
|||
array |
|||
= "[" _ "]" _ { return []; } |
|||
/ "[" _ elements:elements "]" _ { return elements; } |
|||
|
|||
elements |
|||
= head:value tail:("," _ value)* { |
|||
var result = [head]; |
|||
for (var i = 0; i < tail.length; i++) { |
|||
result.push(tail[i][2]); |
|||
} |
|||
return result; |
|||
} |
|||
/* ----- 3. Values ----- */ |
|||
|
|||
value |
|||
= string |
|||
/ number |
|||
= false |
|||
/ null |
|||
/ true |
|||
/ object |
|||
/ array |
|||
/ "true" _ { return true; } |
|||
/ "false" _ { return false; } |
|||
/ "null" _ { return null; } |
|||
|
|||
/* ===== Lexical Elements ===== */ |
|||
|
|||
string "string" |
|||
= '"' '"' _ { return ""; } |
|||
/ '"' chars:chars '"' _ { return chars; } |
|||
/ number |
|||
/ string |
|||
|
|||
chars |
|||
= chars:char+ { return chars.join(""); } |
|||
false = "false" { return false; } |
|||
null = "null" { return null; } |
|||
true = "true" { return true; } |
|||
|
|||
char |
|||
// In the original JSON grammar: "any-Unicode-character-except-"-or-\-or-control-character" |
|||
= [^"\\\0-\x1F\x7f] |
|||
/ '\\"' { return '"'; } |
|||
/ "\\\\" { return "\\"; } |
|||
/ "\\/" { return "/"; } |
|||
/ "\\b" { return "\b"; } |
|||
/ "\\f" { return "\f"; } |
|||
/ "\\n" { return "\n"; } |
|||
/ "\\r" { return "\r"; } |
|||
/ "\\t" { return "\t"; } |
|||
/ "\\u" digits:$(hexDigit hexDigit hexDigit hexDigit) { |
|||
return String.fromCharCode(parseInt(digits, 16)); |
|||
} |
|||
/* ----- 4. Objects ----- */ |
|||
|
|||
number "number" |
|||
= parts:$(int frac exp) _ { return parseFloat(parts); } |
|||
/ parts:$(int frac) _ { return parseFloat(parts); } |
|||
/ parts:$(int exp) _ { return parseFloat(parts); } |
|||
/ parts:$(int) _ { return parseFloat(parts); } |
|||
object |
|||
= begin_object |
|||
members:( |
|||
first:member |
|||
rest:(value_separator m:member { return m; })* |
|||
{ |
|||
var result = {}, i; |
|||
|
|||
int |
|||
= digit19 digits |
|||
/ digit |
|||
/ "-" digit19 digits |
|||
/ "-" digit |
|||
result[first.name] = first.value; |
|||
|
|||
frac |
|||
= "." digits |
|||
for (i = 0; i < rest.length; i++) { |
|||
result[rest[i].name] = rest[i].value; |
|||
} |
|||
|
|||
exp |
|||
= e digits |
|||
return result; |
|||
} |
|||
)? |
|||
end_object |
|||
{ return members !== null ? members: {}; } |
|||
|
|||
digits |
|||
= digit+ |
|||
member |
|||
= name:string name_separator value:value { |
|||
return { name: name, value: value }; |
|||
} |
|||
|
|||
e |
|||
= [eE] [+-]? |
|||
/* ----- 5. Arrays ----- */ |
|||
|
|||
/* |
|||
* The following rules are not present in the original JSON gramar, but they are |
|||
* assumed to exist implicitly. |
|||
* |
|||
* FIXME: Define them according to ECMA-262, 5th ed. |
|||
*/ |
|||
array |
|||
= begin_array |
|||
values:( |
|||
first:value |
|||
rest:(value_separator v:value { return v; })* |
|||
{ return [first].concat(rest); } |
|||
)? |
|||
end_array |
|||
{ return values !== null ? values : []; } |
|||
|
|||
digit |
|||
= [0-9] |
|||
/* ----- 6. Numbers ----- */ |
|||
|
|||
digit19 |
|||
= [1-9] |
|||
number "number" |
|||
= minus? int frac? exp? { return parseFloat(text()); } |
|||
|
|||
hexDigit |
|||
= [0-9a-fA-F] |
|||
decimal_point = "." |
|||
digit1_9 = [1-9] |
|||
e = [eE] |
|||
exp = e (minus / plus)? DIGIT+ |
|||
frac = decimal_point DIGIT+ |
|||
int = zero / (digit1_9 DIGIT*) |
|||
minus = "-" |
|||
plus = "+" |
|||
zero = "0" |
|||
|
|||
/* ===== Whitespace ===== */ |
|||
/* ----- 7. Strings ----- */ |
|||
|
|||
_ "whitespace" |
|||
= whitespace* |
|||
string "string" |
|||
= quotation_mark chars:char* quotation_mark { return chars.join(""); } |
|||
|
|||
// Whitespace is undefined in the original JSON grammar, so I assume a simple |
|||
// conventional definition consistent with ECMA-262, 5th ed. |
|||
whitespace |
|||
= [ \t\n\r] |
|||
char |
|||
= unescaped |
|||
/ escape |
|||
sequence:( |
|||
'"' |
|||
/ "\\" |
|||
/ "/" |
|||
/ "b" { return "\b"; } |
|||
/ "f" { return "\f"; } |
|||
/ "n" { return "\n"; } |
|||
/ "r" { return "\r"; } |
|||
/ "t" { return "\t"; } |
|||
/ "u" digits:$(HEXDIG HEXDIG HEXDIG HEXDIG) { |
|||
return String.fromCharCode(parseInt(digits, 16)); |
|||
} |
|||
) |
|||
{ return sequence; } |
|||
|
|||
escape = "\\" |
|||
quotation_mark = '"' |
|||
unescaped = [\x20-\x21\x23-\x5B\x5D-\u10FFFF] |
|||
|
|||
/* ----- Core ABNF Rules ----- */ |
|||
|
|||
/* See RFC 4234, Appendix B (http://tools.ietf.org/html/rfc4627). */ |
|||
DIGIT = [0-9] |
|||
HEXDIG = [0-9a-f]i |
|||
|
Loading…
Reference in new issue