Complete rewrite of the JSON example grammar
This is a complete rewrite of the JSON example grammar. It is now based on RFC 7159 instead of an informal description at the JSON website. Beside this, the rewrite reflects how I write grammars today (as opposed to few years ago) and what style I would recommend to others.redux
parent
f5443d2bf1
commit
fba70833dd
@ -1,119 +1,132 @@
|
|||||||
/* JSON parser based on the grammar described at http://json.org/. */
|
/*
|
||||||
|
* JSON Grammar
|
||||||
|
* ============
|
||||||
|
*
|
||||||
|
* Based on the grammar from RFC 7159 [1].
|
||||||
|
*
|
||||||
|
* Note that JSON is also specified in ECMA-262 [2], ECMA-404 [3], and on the
|
||||||
|
* JSON website [4] (somewhat informally). The RFC seems the most authoritative
|
||||||
|
* source, which is confirmed e.g. by [5].
|
||||||
|
*
|
||||||
|
* [1] http://tools.ietf.org/html/rfc7159
|
||||||
|
* [2] http://www.ecma-international.org/publications/standards/Ecma-262.htm
|
||||||
|
* [3] http://www.ecma-international.org/publications/standards/Ecma-404.htm
|
||||||
|
* [4] http://json.org/
|
||||||
|
* [5] https://www.tbray.org/ongoing/When/201x/2014/03/05/RFC7159-JSON
|
||||||
|
*/
|
||||||
|
|
||||||
/* ===== Syntactical Elements ===== */
|
/* ----- 2. JSON Grammar ----- */
|
||||||
|
|
||||||
start
|
JSON_text
|
||||||
= _ object:object { return object; }
|
= ws value:value ws { return value; }
|
||||||
|
|
||||||
object
|
begin_array = ws "[" ws
|
||||||
= "{" _ "}" _ { return {}; }
|
begin_object = ws "{" ws
|
||||||
/ "{" _ members:members "}" _ { return members; }
|
end_array = ws "]" ws
|
||||||
|
end_object = ws "}" ws
|
||||||
members
|
name_separator = ws ":" ws
|
||||||
= head:pair tail:("," _ pair)* {
|
value_separator = ws "," ws
|
||||||
var result = {};
|
|
||||||
result[head[0]] = head[1];
|
|
||||||
for (var i = 0; i < tail.length; i++) {
|
|
||||||
result[tail[i][2][0]] = tail[i][2][1];
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
pair
|
ws "whitespace" = [ \t\n\r]*
|
||||||
= name:string ":" _ value:value { return [name, value]; }
|
|
||||||
|
|
||||||
array
|
/* ----- 3. Values ----- */
|
||||||
= "[" _ "]" _ { return []; }
|
|
||||||
/ "[" _ elements:elements "]" _ { return elements; }
|
|
||||||
|
|
||||||
elements
|
|
||||||
= head:value tail:("," _ value)* {
|
|
||||||
var result = [head];
|
|
||||||
for (var i = 0; i < tail.length; i++) {
|
|
||||||
result.push(tail[i][2]);
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
value
|
value
|
||||||
= string
|
= false
|
||||||
/ number
|
/ null
|
||||||
|
/ true
|
||||||
/ object
|
/ object
|
||||||
/ array
|
/ array
|
||||||
/ "true" _ { return true; }
|
/ number
|
||||||
/ "false" _ { return false; }
|
/ string
|
||||||
/ "null" _ { return null; }
|
|
||||||
|
|
||||||
/* ===== Lexical Elements ===== */
|
false = "false" { return false; }
|
||||||
|
null = "null" { return null; }
|
||||||
|
true = "true" { return true; }
|
||||||
|
|
||||||
string "string"
|
/* ----- 4. Objects ----- */
|
||||||
= '"' '"' _ { return ""; }
|
|
||||||
/ '"' chars:chars '"' _ { return chars; }
|
|
||||||
|
|
||||||
chars
|
object
|
||||||
= chars:char+ { return chars.join(""); }
|
= begin_object
|
||||||
|
members:(
|
||||||
|
first:member
|
||||||
|
rest:(value_separator m:member { return m; })*
|
||||||
|
{
|
||||||
|
var result = {}, i;
|
||||||
|
|
||||||
char
|
result[first.name] = first.value;
|
||||||
// In the original JSON grammar: "any-Unicode-character-except-"-or-\-or-control-character"
|
|
||||||
= [^"\\\0-\x1F\x7f]
|
for (i = 0; i < rest.length; i++) {
|
||||||
/ '\\"' { return '"'; }
|
result[rest[i].name] = rest[i].value;
|
||||||
/ "\\\\" { return "\\"; }
|
|
||||||
/ "\\/" { return "/"; }
|
|
||||||
/ "\\b" { return "\b"; }
|
|
||||||
/ "\\f" { return "\f"; }
|
|
||||||
/ "\\n" { return "\n"; }
|
|
||||||
/ "\\r" { return "\r"; }
|
|
||||||
/ "\\t" { return "\t"; }
|
|
||||||
/ "\\u" digits:$(hexDigit hexDigit hexDigit hexDigit) {
|
|
||||||
return String.fromCharCode(parseInt(digits, 16));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
number "number"
|
return result;
|
||||||
= parts:$(int frac exp) _ { return parseFloat(parts); }
|
}
|
||||||
/ parts:$(int frac) _ { return parseFloat(parts); }
|
)?
|
||||||
/ parts:$(int exp) _ { return parseFloat(parts); }
|
end_object
|
||||||
/ parts:$(int) _ { return parseFloat(parts); }
|
{ return members !== null ? members: {}; }
|
||||||
|
|
||||||
int
|
member
|
||||||
= digit19 digits
|
= name:string name_separator value:value {
|
||||||
/ digit
|
return { name: name, value: value };
|
||||||
/ "-" digit19 digits
|
}
|
||||||
/ "-" digit
|
|
||||||
|
|
||||||
frac
|
/* ----- 5. Arrays ----- */
|
||||||
= "." digits
|
|
||||||
|
|
||||||
exp
|
array
|
||||||
= e digits
|
= begin_array
|
||||||
|
values:(
|
||||||
|
first:value
|
||||||
|
rest:(value_separator v:value { return v; })*
|
||||||
|
{ return [first].concat(rest); }
|
||||||
|
)?
|
||||||
|
end_array
|
||||||
|
{ return values !== null ? values : []; }
|
||||||
|
|
||||||
digits
|
/* ----- 6. Numbers ----- */
|
||||||
= digit+
|
|
||||||
|
|
||||||
e
|
number "number"
|
||||||
= [eE] [+-]?
|
= minus? int frac? exp? { return parseFloat(text()); }
|
||||||
|
|
||||||
/*
|
decimal_point = "."
|
||||||
* The following rules are not present in the original JSON gramar, but they are
|
digit1_9 = [1-9]
|
||||||
* assumed to exist implicitly.
|
e = [eE]
|
||||||
*
|
exp = e (minus / plus)? DIGIT+
|
||||||
* FIXME: Define them according to ECMA-262, 5th ed.
|
frac = decimal_point DIGIT+
|
||||||
*/
|
int = zero / (digit1_9 DIGIT*)
|
||||||
|
minus = "-"
|
||||||
|
plus = "+"
|
||||||
|
zero = "0"
|
||||||
|
|
||||||
digit
|
/* ----- 7. Strings ----- */
|
||||||
= [0-9]
|
|
||||||
|
|
||||||
digit19
|
string "string"
|
||||||
= [1-9]
|
= quotation_mark chars:char* quotation_mark { return chars.join(""); }
|
||||||
|
|
||||||
hexDigit
|
char
|
||||||
= [0-9a-fA-F]
|
= unescaped
|
||||||
|
/ escape
|
||||||
|
sequence:(
|
||||||
|
'"'
|
||||||
|
/ "\\"
|
||||||
|
/ "/"
|
||||||
|
/ "b" { return "\b"; }
|
||||||
|
/ "f" { return "\f"; }
|
||||||
|
/ "n" { return "\n"; }
|
||||||
|
/ "r" { return "\r"; }
|
||||||
|
/ "t" { return "\t"; }
|
||||||
|
/ "u" digits:$(HEXDIG HEXDIG HEXDIG HEXDIG) {
|
||||||
|
return String.fromCharCode(parseInt(digits, 16));
|
||||||
|
}
|
||||||
|
)
|
||||||
|
{ return sequence; }
|
||||||
|
|
||||||
/* ===== Whitespace ===== */
|
escape = "\\"
|
||||||
|
quotation_mark = '"'
|
||||||
|
unescaped = [\x20-\x21\x23-\x5B\x5D-\u10FFFF]
|
||||||
|
|
||||||
_ "whitespace"
|
/* ----- Core ABNF Rules ----- */
|
||||||
= whitespace*
|
|
||||||
|
|
||||||
// Whitespace is undefined in the original JSON grammar, so I assume a simple
|
/* See RFC 4234, Appendix B (http://tools.ietf.org/html/rfc4627). */
|
||||||
// conventional definition consistent with ECMA-262, 5th ed.
|
DIGIT = [0-9]
|
||||||
whitespace
|
HEXDIG = [0-9a-f]i
|
||||||
= [ \t\n\r]
|
|
||||||
|
Loading…
Reference in New Issue