Complete rewrite of the JSON example grammar
This is a complete rewrite of the JSON example grammar. It is now based on RFC 7159 instead of an informal description at the JSON website. Beside this, the rewrite reflects how I write grammars today (as opposed to few years ago) and what style I would recommend to others.redux
parent
f5443d2bf1
commit
fba70833dd
@ -1,119 +1,132 @@
|
||||
/* JSON parser based on the grammar described at http://json.org/. */
|
||||
/*
|
||||
* JSON Grammar
|
||||
* ============
|
||||
*
|
||||
* Based on the grammar from RFC 7159 [1].
|
||||
*
|
||||
* Note that JSON is also specified in ECMA-262 [2], ECMA-404 [3], and on the
|
||||
* JSON website [4] (somewhat informally). The RFC seems the most authoritative
|
||||
* source, which is confirmed e.g. by [5].
|
||||
*
|
||||
* [1] http://tools.ietf.org/html/rfc7159
|
||||
* [2] http://www.ecma-international.org/publications/standards/Ecma-262.htm
|
||||
* [3] http://www.ecma-international.org/publications/standards/Ecma-404.htm
|
||||
* [4] http://json.org/
|
||||
* [5] https://www.tbray.org/ongoing/When/201x/2014/03/05/RFC7159-JSON
|
||||
*/
|
||||
|
||||
/* ===== Syntactical Elements ===== */
|
||||
/* ----- 2. JSON Grammar ----- */
|
||||
|
||||
start
|
||||
= _ object:object { return object; }
|
||||
JSON_text
|
||||
= ws value:value ws { return value; }
|
||||
|
||||
object
|
||||
= "{" _ "}" _ { return {}; }
|
||||
/ "{" _ members:members "}" _ { return members; }
|
||||
|
||||
members
|
||||
= head:pair tail:("," _ pair)* {
|
||||
var result = {};
|
||||
result[head[0]] = head[1];
|
||||
for (var i = 0; i < tail.length; i++) {
|
||||
result[tail[i][2][0]] = tail[i][2][1];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
begin_array = ws "[" ws
|
||||
begin_object = ws "{" ws
|
||||
end_array = ws "]" ws
|
||||
end_object = ws "}" ws
|
||||
name_separator = ws ":" ws
|
||||
value_separator = ws "," ws
|
||||
|
||||
pair
|
||||
= name:string ":" _ value:value { return [name, value]; }
|
||||
ws "whitespace" = [ \t\n\r]*
|
||||
|
||||
array
|
||||
= "[" _ "]" _ { return []; }
|
||||
/ "[" _ elements:elements "]" _ { return elements; }
|
||||
|
||||
elements
|
||||
= head:value tail:("," _ value)* {
|
||||
var result = [head];
|
||||
for (var i = 0; i < tail.length; i++) {
|
||||
result.push(tail[i][2]);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
/* ----- 3. Values ----- */
|
||||
|
||||
value
|
||||
= string
|
||||
/ number
|
||||
= false
|
||||
/ null
|
||||
/ true
|
||||
/ object
|
||||
/ array
|
||||
/ "true" _ { return true; }
|
||||
/ "false" _ { return false; }
|
||||
/ "null" _ { return null; }
|
||||
|
||||
/* ===== Lexical Elements ===== */
|
||||
|
||||
string "string"
|
||||
= '"' '"' _ { return ""; }
|
||||
/ '"' chars:chars '"' _ { return chars; }
|
||||
/ number
|
||||
/ string
|
||||
|
||||
chars
|
||||
= chars:char+ { return chars.join(""); }
|
||||
false = "false" { return false; }
|
||||
null = "null" { return null; }
|
||||
true = "true" { return true; }
|
||||
|
||||
char
|
||||
// In the original JSON grammar: "any-Unicode-character-except-"-or-\-or-control-character"
|
||||
= [^"\\\0-\x1F\x7f]
|
||||
/ '\\"' { return '"'; }
|
||||
/ "\\\\" { return "\\"; }
|
||||
/ "\\/" { return "/"; }
|
||||
/ "\\b" { return "\b"; }
|
||||
/ "\\f" { return "\f"; }
|
||||
/ "\\n" { return "\n"; }
|
||||
/ "\\r" { return "\r"; }
|
||||
/ "\\t" { return "\t"; }
|
||||
/ "\\u" digits:$(hexDigit hexDigit hexDigit hexDigit) {
|
||||
return String.fromCharCode(parseInt(digits, 16));
|
||||
}
|
||||
/* ----- 4. Objects ----- */
|
||||
|
||||
number "number"
|
||||
= parts:$(int frac exp) _ { return parseFloat(parts); }
|
||||
/ parts:$(int frac) _ { return parseFloat(parts); }
|
||||
/ parts:$(int exp) _ { return parseFloat(parts); }
|
||||
/ parts:$(int) _ { return parseFloat(parts); }
|
||||
object
|
||||
= begin_object
|
||||
members:(
|
||||
first:member
|
||||
rest:(value_separator m:member { return m; })*
|
||||
{
|
||||
var result = {}, i;
|
||||
|
||||
int
|
||||
= digit19 digits
|
||||
/ digit
|
||||
/ "-" digit19 digits
|
||||
/ "-" digit
|
||||
result[first.name] = first.value;
|
||||
|
||||
frac
|
||||
= "." digits
|
||||
for (i = 0; i < rest.length; i++) {
|
||||
result[rest[i].name] = rest[i].value;
|
||||
}
|
||||
|
||||
exp
|
||||
= e digits
|
||||
return result;
|
||||
}
|
||||
)?
|
||||
end_object
|
||||
{ return members !== null ? members: {}; }
|
||||
|
||||
digits
|
||||
= digit+
|
||||
member
|
||||
= name:string name_separator value:value {
|
||||
return { name: name, value: value };
|
||||
}
|
||||
|
||||
e
|
||||
= [eE] [+-]?
|
||||
/* ----- 5. Arrays ----- */
|
||||
|
||||
/*
|
||||
* The following rules are not present in the original JSON gramar, but they are
|
||||
* assumed to exist implicitly.
|
||||
*
|
||||
* FIXME: Define them according to ECMA-262, 5th ed.
|
||||
*/
|
||||
array
|
||||
= begin_array
|
||||
values:(
|
||||
first:value
|
||||
rest:(value_separator v:value { return v; })*
|
||||
{ return [first].concat(rest); }
|
||||
)?
|
||||
end_array
|
||||
{ return values !== null ? values : []; }
|
||||
|
||||
digit
|
||||
= [0-9]
|
||||
/* ----- 6. Numbers ----- */
|
||||
|
||||
digit19
|
||||
= [1-9]
|
||||
number "number"
|
||||
= minus? int frac? exp? { return parseFloat(text()); }
|
||||
|
||||
hexDigit
|
||||
= [0-9a-fA-F]
|
||||
decimal_point = "."
|
||||
digit1_9 = [1-9]
|
||||
e = [eE]
|
||||
exp = e (minus / plus)? DIGIT+
|
||||
frac = decimal_point DIGIT+
|
||||
int = zero / (digit1_9 DIGIT*)
|
||||
minus = "-"
|
||||
plus = "+"
|
||||
zero = "0"
|
||||
|
||||
/* ===== Whitespace ===== */
|
||||
/* ----- 7. Strings ----- */
|
||||
|
||||
_ "whitespace"
|
||||
= whitespace*
|
||||
string "string"
|
||||
= quotation_mark chars:char* quotation_mark { return chars.join(""); }
|
||||
|
||||
// Whitespace is undefined in the original JSON grammar, so I assume a simple
|
||||
// conventional definition consistent with ECMA-262, 5th ed.
|
||||
whitespace
|
||||
= [ \t\n\r]
|
||||
char
|
||||
= unescaped
|
||||
/ escape
|
||||
sequence:(
|
||||
'"'
|
||||
/ "\\"
|
||||
/ "/"
|
||||
/ "b" { return "\b"; }
|
||||
/ "f" { return "\f"; }
|
||||
/ "n" { return "\n"; }
|
||||
/ "r" { return "\r"; }
|
||||
/ "t" { return "\t"; }
|
||||
/ "u" digits:$(HEXDIG HEXDIG HEXDIG HEXDIG) {
|
||||
return String.fromCharCode(parseInt(digits, 16));
|
||||
}
|
||||
)
|
||||
{ return sequence; }
|
||||
|
||||
escape = "\\"
|
||||
quotation_mark = '"'
|
||||
unescaped = [\x20-\x21\x23-\x5B\x5D-\u10FFFF]
|
||||
|
||||
/* ----- Core ABNF Rules ----- */
|
||||
|
||||
/* See RFC 4234, Appendix B (http://tools.ietf.org/html/rfc4627). */
|
||||
DIGIT = [0-9]
|
||||
HEXDIG = [0-9a-f]i
|
||||
|
Loading…
Reference in New Issue