e510ecc3d0
In the past year I worked on various grammars where first/rest or head/tail were used as labels for parts of lists. I found I associate head/tail with a list immediately, while in case of first/rest I have to "parse" grammar rules for a while before understanding their structure. Moreover, I tend to assume that rest is a list of the same thigs as first, but I don't have such assumption in case of head/tail. This assumption was in conflict with the grammar structure. I'm not sure how much these observations are applicable to others, but I decided to act on them and switch from first/rest to head/tail.
133 lines
2.9 KiB
JavaScript
133 lines
2.9 KiB
JavaScript
/*
|
|
* JSON Grammar
|
|
* ============
|
|
*
|
|
* Based on the grammar from RFC 7159 [1].
|
|
*
|
|
* Note that JSON is also specified in ECMA-262 [2], ECMA-404 [3], and on the
|
|
* JSON website [4] (somewhat informally). The RFC seems the most authoritative
|
|
* source, which is confirmed e.g. by [5].
|
|
*
|
|
* [1] http://tools.ietf.org/html/rfc7159
|
|
* [2] http://www.ecma-international.org/publications/standards/Ecma-262.htm
|
|
* [3] http://www.ecma-international.org/publications/standards/Ecma-404.htm
|
|
* [4] http://json.org/
|
|
* [5] https://www.tbray.org/ongoing/When/201x/2014/03/05/RFC7159-JSON
|
|
*/
|
|
|
|
/* ----- 2. JSON Grammar ----- */
|
|
|
|
JSON_text
|
|
= ws value:value ws { return value; }
|
|
|
|
begin_array = ws "[" ws
|
|
begin_object = ws "{" ws
|
|
end_array = ws "]" ws
|
|
end_object = ws "}" ws
|
|
name_separator = ws ":" ws
|
|
value_separator = ws "," ws
|
|
|
|
ws "whitespace" = [ \t\n\r]*
|
|
|
|
/* ----- 3. Values ----- */
|
|
|
|
value
|
|
= false
|
|
/ null
|
|
/ true
|
|
/ object
|
|
/ array
|
|
/ number
|
|
/ string
|
|
|
|
false = "false" { return false; }
|
|
null = "null" { return null; }
|
|
true = "true" { return true; }
|
|
|
|
/* ----- 4. Objects ----- */
|
|
|
|
object
|
|
= begin_object
|
|
members:(
|
|
head:member
|
|
tail:(value_separator m:member { return m; })*
|
|
{
|
|
var result = {}, i;
|
|
|
|
result[head.name] = head.value;
|
|
|
|
for (i = 0; i < tail.length; i++) {
|
|
result[tail[i].name] = tail[i].value;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
)?
|
|
end_object
|
|
{ return members !== null ? members: {}; }
|
|
|
|
member
|
|
= name:string name_separator value:value {
|
|
return { name: name, value: value };
|
|
}
|
|
|
|
/* ----- 5. Arrays ----- */
|
|
|
|
array
|
|
= begin_array
|
|
values:(
|
|
head:value
|
|
tail:(value_separator v:value { return v; })*
|
|
{ return [head].concat(tail); }
|
|
)?
|
|
end_array
|
|
{ return values !== null ? values : []; }
|
|
|
|
/* ----- 6. Numbers ----- */
|
|
|
|
number "number"
|
|
= minus? int frac? exp? { return parseFloat(text()); }
|
|
|
|
decimal_point = "."
|
|
digit1_9 = [1-9]
|
|
e = [eE]
|
|
exp = e (minus / plus)? DIGIT+
|
|
frac = decimal_point DIGIT+
|
|
int = zero / (digit1_9 DIGIT*)
|
|
minus = "-"
|
|
plus = "+"
|
|
zero = "0"
|
|
|
|
/* ----- 7. Strings ----- */
|
|
|
|
string "string"
|
|
= quotation_mark chars:char* quotation_mark { return chars.join(""); }
|
|
|
|
char
|
|
= unescaped
|
|
/ escape
|
|
sequence:(
|
|
'"'
|
|
/ "\\"
|
|
/ "/"
|
|
/ "b" { return "\b"; }
|
|
/ "f" { return "\f"; }
|
|
/ "n" { return "\n"; }
|
|
/ "r" { return "\r"; }
|
|
/ "t" { return "\t"; }
|
|
/ "u" digits:$(HEXDIG HEXDIG HEXDIG HEXDIG) {
|
|
return String.fromCharCode(parseInt(digits, 16));
|
|
}
|
|
)
|
|
{ return sequence; }
|
|
|
|
escape = "\\"
|
|
quotation_mark = '"'
|
|
unescaped = [\x20-\x21\x23-\x5B\x5D-\u10FFFF]
|
|
|
|
/* ----- Core ABNF Rules ----- */
|
|
|
|
/* See RFC 4234, Appendix B (http://tools.ietf.org/html/rfc4627). */
|
|
DIGIT = [0-9]
|
|
HEXDIG = [0-9a-f]i
|