f07ab7f32e
The "unescaped" rule was created by mechanically translating original RFC 7159 rule: unescaped = %x20-21 / %x23-5B / %x5D-10FFFF into: unescaped = [\x20-\x21\x23-\x5B\x5D-\u10FFFF] However, this mechanical translation was incorrect as PEG.js grammars don't have 6-digit Unicode escape sequences. Sequence "\u10FFFF" was interpreted as "\u10FF" followed by two "F" characters. This commit rewrites the "unescaped" rule into a form which, while not being a mechanical translation of the original rule, matches the same characters in the whole Unicode range. It also macthes textual description of string representation in RFC 7159: All Unicode characters may be placed within the quotation marks, except for the characters that must be escaped: quotation mark, reverse solidus, and the control characters (U+0000 through U+001F). Fixes #417.
133 lines
2.8 KiB
JavaScript
133 lines
2.8 KiB
JavaScript
/*
|
|
* JSON Grammar
|
|
* ============
|
|
*
|
|
* Based on the grammar from RFC 7159 [1].
|
|
*
|
|
* Note that JSON is also specified in ECMA-262 [2], ECMA-404 [3], and on the
|
|
* JSON website [4] (somewhat informally). The RFC seems the most authoritative
|
|
* source, which is confirmed e.g. by [5].
|
|
*
|
|
* [1] http://tools.ietf.org/html/rfc7159
|
|
* [2] http://www.ecma-international.org/publications/standards/Ecma-262.htm
|
|
* [3] http://www.ecma-international.org/publications/standards/Ecma-404.htm
|
|
* [4] http://json.org/
|
|
* [5] https://www.tbray.org/ongoing/When/201x/2014/03/05/RFC7159-JSON
|
|
*/
|
|
|
|
/* ----- 2. JSON Grammar ----- */
|
|
|
|
JSON_text
|
|
= ws value:value ws { return value; }
|
|
|
|
begin_array = ws "[" ws
|
|
begin_object = ws "{" ws
|
|
end_array = ws "]" ws
|
|
end_object = ws "}" ws
|
|
name_separator = ws ":" ws
|
|
value_separator = ws "," ws
|
|
|
|
ws "whitespace" = [ \t\n\r]*
|
|
|
|
/* ----- 3. Values ----- */
|
|
|
|
value
|
|
= false
|
|
/ null
|
|
/ true
|
|
/ object
|
|
/ array
|
|
/ number
|
|
/ string
|
|
|
|
false = "false" { return false; }
|
|
null = "null" { return null; }
|
|
true = "true" { return true; }
|
|
|
|
/* ----- 4. Objects ----- */
|
|
|
|
object
|
|
= begin_object
|
|
members:(
|
|
head:member
|
|
tail:(value_separator m:member { return m; })*
|
|
{
|
|
var result = {}, i;
|
|
|
|
result[head.name] = head.value;
|
|
|
|
for (i = 0; i < tail.length; i++) {
|
|
result[tail[i].name] = tail[i].value;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
)?
|
|
end_object
|
|
{ return members !== null ? members: {}; }
|
|
|
|
member
|
|
= name:string name_separator value:value {
|
|
return { name: name, value: value };
|
|
}
|
|
|
|
/* ----- 5. Arrays ----- */
|
|
|
|
array
|
|
= begin_array
|
|
values:(
|
|
head:value
|
|
tail:(value_separator v:value { return v; })*
|
|
{ return [head].concat(tail); }
|
|
)?
|
|
end_array
|
|
{ return values !== null ? values : []; }
|
|
|
|
/* ----- 6. Numbers ----- */
|
|
|
|
number "number"
|
|
= minus? int frac? exp? { return parseFloat(text()); }
|
|
|
|
decimal_point = "."
|
|
digit1_9 = [1-9]
|
|
e = [eE]
|
|
exp = e (minus / plus)? DIGIT+
|
|
frac = decimal_point DIGIT+
|
|
int = zero / (digit1_9 DIGIT*)
|
|
minus = "-"
|
|
plus = "+"
|
|
zero = "0"
|
|
|
|
/* ----- 7. Strings ----- */
|
|
|
|
string "string"
|
|
= quotation_mark chars:char* quotation_mark { return chars.join(""); }
|
|
|
|
char
|
|
= unescaped
|
|
/ escape
|
|
sequence:(
|
|
'"'
|
|
/ "\\"
|
|
/ "/"
|
|
/ "b" { return "\b"; }
|
|
/ "f" { return "\f"; }
|
|
/ "n" { return "\n"; }
|
|
/ "r" { return "\r"; }
|
|
/ "t" { return "\t"; }
|
|
/ "u" digits:$(HEXDIG HEXDIG HEXDIG HEXDIG) {
|
|
return String.fromCharCode(parseInt(digits, 16));
|
|
}
|
|
)
|
|
{ return sequence; }
|
|
|
|
escape = "\\"
|
|
quotation_mark = '"'
|
|
unescaped = [^\0-\x1F\x22\x5C]
|
|
|
|
/* ----- Core ABNF Rules ----- */
|
|
|
|
/* See RFC 4234, Appendix B (http://tools.ietf.org/html/rfc4627). */
|
|
DIGIT = [0-9]
|
|
HEXDIG = [0-9a-f]i
|