Complete rewrite of the JSON example grammar

This is a complete rewrite of the JSON example grammar. It is now based
on RFC 7159 instead of an informal description at the JSON website.

Beside this, the rewrite reflects how I write grammars today (as opposed
to few years ago) and what style I would recommend to others.
David Majda 10 years ago
parent f5443d2bf1
commit fba70833dd

@ -1,119 +1,132 @@
/* JSON parser based on the grammar described at */ /*
* JSON Grammar
* ============
* Based on the grammar from RFC 7159 [1].
* Note that JSON is also specified in ECMA-262 [2], ECMA-404 [3], and on the
* JSON website [4] (somewhat informally). The RFC seems the most authoritative
* source, which is confirmed e.g. by [5].
* [1]
* [2]
* [3]
* [4]
* [5]
/* ===== Syntactical Elements ===== */ /* ----- 2. JSON Grammar ----- */
start JSON_text
= _ object:object { return object; } = ws value:value ws { return value; }
object begin_array = ws "[" ws
= "{" _ "}" _ { return {}; } begin_object = ws "{" ws
/ "{" _ members:members "}" _ { return members; } end_array = ws "]" ws
end_object = ws "}" ws
members name_separator = ws ":" ws
= head:pair tail:("," _ pair)* { value_separator = ws "," ws
var result = {};
result[head[0]] = head[1];
for (var i = 0; i < tail.length; i++) {
result[tail[i][2][0]] = tail[i][2][1];
return result;
pair ws "whitespace" = [ \t\n\r]*
= name:string ":" _ value:value { return [name, value]; }
array /* ----- 3. Values ----- */
= "[" _ "]" _ { return []; }
/ "[" _ elements:elements "]" _ { return elements; }
= head:value tail:("," _ value)* {
var result = [head];
for (var i = 0; i < tail.length; i++) {
return result;
value value
= string = false
/ number / null
/ true
/ object / object
/ array / array
/ "true" _ { return true; } / number
/ "false" _ { return false; } / string
/ "null" _ { return null; }
/* ===== Lexical Elements ===== */ false = "false" { return false; }
null = "null" { return null; }
true = "true" { return true; }
string "string" /* ----- 4. Objects ----- */
= '"' '"' _ { return ""; }
/ '"' chars:chars '"' _ { return chars; }
chars object
= chars:char+ { return chars.join(""); } = begin_object
rest:(value_separator m:member { return m; })*
var result = {}, i;
char result[] = first.value;
// In the original JSON grammar: "any-Unicode-character-except-"-or-\-or-control-character"
= [^"\\\0-\x1F\x7f] for (i = 0; i < rest.length; i++) {
/ '\\"' { return '"'; } result[rest[i].name] = rest[i].value;
/ "\\\\" { return "\\"; }
/ "\\/" { return "/"; }
/ "\\b" { return "\b"; }
/ "\\f" { return "\f"; }
/ "\\n" { return "\n"; }
/ "\\r" { return "\r"; }
/ "\\t" { return "\t"; }
/ "\\u" digits:$(hexDigit hexDigit hexDigit hexDigit) {
return String.fromCharCode(parseInt(digits, 16));
} }
number "number" return result;
= parts:$(int frac exp) _ { return parseFloat(parts); } }
/ parts:$(int frac) _ { return parseFloat(parts); } )?
/ parts:$(int exp) _ { return parseFloat(parts); } end_object
/ parts:$(int) _ { return parseFloat(parts); } { return members !== null ? members: {}; }
int member
= digit19 digits = name:string name_separator value:value {
/ digit return { name: name, value: value };
/ "-" digit19 digits }
/ "-" digit
frac /* ----- 5. Arrays ----- */
= "." digits
exp array
= e digits = begin_array
rest:(value_separator v:value { return v; })*
{ return [first].concat(rest); }
{ return values !== null ? values : []; }
digits /* ----- 6. Numbers ----- */
= digit+
e number "number"
= [eE] [+-]? = minus? int frac? exp? { return parseFloat(text()); }
/* decimal_point = "."
* The following rules are not present in the original JSON gramar, but they are digit1_9 = [1-9]
* assumed to exist implicitly. e = [eE]
* exp = e (minus / plus)? DIGIT+
* FIXME: Define them according to ECMA-262, 5th ed. frac = decimal_point DIGIT+
*/ int = zero / (digit1_9 DIGIT*)
minus = "-"
plus = "+"
zero = "0"
digit /* ----- 7. Strings ----- */
= [0-9]
digit19 string "string"
= [1-9] = quotation_mark chars:char* quotation_mark { return chars.join(""); }
hexDigit char
= [0-9a-fA-F] = unescaped
/ escape
/ "\\"
/ "/"
/ "b" { return "\b"; }
/ "f" { return "\f"; }
/ "n" { return "\n"; }
/ "r" { return "\r"; }
/ "t" { return "\t"; }
return String.fromCharCode(parseInt(digits, 16));
{ return sequence; }
/* ===== Whitespace ===== */ escape = "\\"
quotation_mark = '"'
unescaped = [\x20-\x21\x23-\x5B\x5D-\u10FFFF]
_ "whitespace" /* ----- Core ABNF Rules ----- */
= whitespace*
// Whitespace is undefined in the original JSON grammar, so I assume a simple /* See RFC 4234, Appendix B ( */
// conventional definition consistent with ECMA-262, 5th ed. DIGIT = [0-9]
whitespace HEXDIG = [0-9a-f]i
= [ \t\n\r]
