diff --git a/examples/json.pegjs b/examples/json.pegjs index 0729c4a..946589e 100644 --- a/examples/json.pegjs +++ b/examples/json.pegjs @@ -1,119 +1,132 @@ -/* JSON parser based on the grammar described at http://json.org/. */ +/* + * JSON Grammar + * ============ + * + * Based on the grammar from RFC 7159 [1]. + * + * Note that JSON is also specified in ECMA-262 [2], ECMA-404 [3], and on the + * JSON website [4] (somewhat informally). The RFC seems the most authoritative + * source, which is confirmed e.g. by [5]. + * + * [1] http://tools.ietf.org/html/rfc7159 + * [2] http://www.ecma-international.org/publications/standards/Ecma-262.htm + * [3] http://www.ecma-international.org/publications/standards/Ecma-404.htm + * [4] http://json.org/ + * [5] https://www.tbray.org/ongoing/When/201x/2014/03/05/RFC7159-JSON + */ -/* ===== Syntactical Elements ===== */ +/* ----- 2. JSON Grammar ----- */ -start - = _ object:object { return object; } +JSON_text + = ws value:value ws { return value; } -object - = "{" _ "}" _ { return {}; } - / "{" _ members:members "}" _ { return members; } - -members - = head:pair tail:("," _ pair)* { - var result = {}; - result[head[0]] = head[1]; - for (var i = 0; i < tail.length; i++) { - result[tail[i][2][0]] = tail[i][2][1]; - } - return result; - } +begin_array = ws "[" ws +begin_object = ws "{" ws +end_array = ws "]" ws +end_object = ws "}" ws +name_separator = ws ":" ws +value_separator = ws "," ws -pair - = name:string ":" _ value:value { return [name, value]; } +ws "whitespace" = [ \t\n\r]* -array - = "[" _ "]" _ { return []; } - / "[" _ elements:elements "]" _ { return elements; } - -elements - = head:value tail:("," _ value)* { - var result = [head]; - for (var i = 0; i < tail.length; i++) { - result.push(tail[i][2]); - } - return result; - } +/* ----- 3. Values ----- */ value - = string - / number + = false + / null + / true / object / array - / "true" _ { return true; } - / "false" _ { return false; } - / "null" _ { return null; } - -/* ===== Lexical Elements ===== */ - -string "string" - = '"' '"' _ { return ""; } - / '"' chars:chars '"' _ { return chars; } + / number + / string -chars - = chars:char+ { return chars.join(""); } +false = "false" { return false; } +null = "null" { return null; } +true = "true" { return true; } -char - // In the original JSON grammar: "any-Unicode-character-except-"-or-\-or-control-character" - = [^"\\\0-\x1F\x7f] - / '\\"' { return '"'; } - / "\\\\" { return "\\"; } - / "\\/" { return "/"; } - / "\\b" { return "\b"; } - / "\\f" { return "\f"; } - / "\\n" { return "\n"; } - / "\\r" { return "\r"; } - / "\\t" { return "\t"; } - / "\\u" digits:$(hexDigit hexDigit hexDigit hexDigit) { - return String.fromCharCode(parseInt(digits, 16)); - } +/* ----- 4. Objects ----- */ -number "number" - = parts:$(int frac exp) _ { return parseFloat(parts); } - / parts:$(int frac) _ { return parseFloat(parts); } - / parts:$(int exp) _ { return parseFloat(parts); } - / parts:$(int) _ { return parseFloat(parts); } +object + = begin_object + members:( + first:member + rest:(value_separator m:member { return m; })* + { + var result = {}, i; -int - = digit19 digits - / digit - / "-" digit19 digits - / "-" digit + result[first.name] = first.value; -frac - = "." digits + for (i = 0; i < rest.length; i++) { + result[rest[i].name] = rest[i].value; + } -exp - = e digits + return result; + } + )? + end_object + { return members !== null ? members: {}; } -digits - = digit+ +member + = name:string name_separator value:value { + return { name: name, value: value }; + } -e - = [eE] [+-]? +/* ----- 5. Arrays ----- */ -/* - * The following rules are not present in the original JSON gramar, but they are - * assumed to exist implicitly. - * - * FIXME: Define them according to ECMA-262, 5th ed. - */ +array + = begin_array + values:( + first:value + rest:(value_separator v:value { return v; })* + { return [first].concat(rest); } + )? + end_array + { return values !== null ? values : []; } -digit - = [0-9] +/* ----- 6. Numbers ----- */ -digit19 - = [1-9] +number "number" + = minus? int frac? exp? { return parseFloat(text()); } -hexDigit - = [0-9a-fA-F] +decimal_point = "." +digit1_9 = [1-9] +e = [eE] +exp = e (minus / plus)? DIGIT+ +frac = decimal_point DIGIT+ +int = zero / (digit1_9 DIGIT*) +minus = "-" +plus = "+" +zero = "0" -/* ===== Whitespace ===== */ +/* ----- 7. Strings ----- */ -_ "whitespace" - = whitespace* +string "string" + = quotation_mark chars:char* quotation_mark { return chars.join(""); } -// Whitespace is undefined in the original JSON grammar, so I assume a simple -// conventional definition consistent with ECMA-262, 5th ed. -whitespace - = [ \t\n\r] +char + = unescaped + / escape + sequence:( + '"' + / "\\" + / "/" + / "b" { return "\b"; } + / "f" { return "\f"; } + / "n" { return "\n"; } + / "r" { return "\r"; } + / "t" { return "\t"; } + / "u" digits:$(HEXDIG HEXDIG HEXDIG HEXDIG) { + return String.fromCharCode(parseInt(digits, 16)); + } + ) + { return sequence; } + +escape = "\\" +quotation_mark = '"' +unescaped = [\x20-\x21\x23-\x5B\x5D-\u10FFFF] + +/* ----- Core ABNF Rules ----- */ + +/* See RFC 4234, Appendix B (http://tools.ietf.org/html/rfc4627). */ +DIGIT = [0-9] +HEXDIG = [0-9a-f]i