Complete rewrite of the JSON example grammar

This is a complete rewrite of the JSON example grammar. It is now based on RFC 7159 instead of an informal description at the JSON website. Beside this, the rewrite reflects how I write grammars today (as opposed to few years ago) and what style I would recommend to others.
11 years ago · fba70833dd
parent f5443d2bf1
commit fba70833dd
1 changed files with 108 additions and 95 deletions
--- a/examples/json.pegjs
+++ b/examples/json.pegjs
@ -1,119 +1,132 @@
-/* JSON parser based on the grammar described at http://json.org/. */
+/*
+ * JSON Grammar
+ * ============
+ *
+ * Based on the grammar from RFC 7159 [1].
+ *
+ * Note that JSON is also specified in ECMA-262 [2], ECMA-404 [3], and on the
+ * JSON website [4] (somewhat informally). The RFC seems the most authoritative
+ * source, which is confirmed e.g. by [5].
+ *
+ * [1] http://tools.ietf.org/html/rfc7159
+ * [2] http://www.ecma-international.org/publications/standards/Ecma-262.htm
+ * [3] http://www.ecma-international.org/publications/standards/Ecma-404.htm
+ * [4] http://json.org/
+ * [5] https://www.tbray.org/ongoing/When/201x/2014/03/05/RFC7159-JSON
+ */

-/* ===== Syntactical Elements ===== */
+/* ----- 2. JSON Grammar ----- */

-start
-  = _ object:object { return object; }
+JSON_text
+  = ws value:value ws { return value; }

-object
-  = "{" _ "}" _                 { return {};      }
-  / "{" _ members:members "}" _ { return members; }
-
-members
-  = head:pair tail:("," _ pair)* {
-      var result = {};
-      result[head[0]] = head[1];
-      for (var i = 0; i < tail.length; i++) {
-        result[tail[i][2][0]] = tail[i][2][1];
-      }
-      return result;
-    }
+begin_array     = ws "[" ws
+begin_object    = ws "{" ws
+end_array       = ws "]" ws
+end_object      = ws "}" ws
+name_separator  = ws ":" ws
+value_separator = ws "," ws

-pair
-  = name:string ":" _ value:value { return [name, value]; }
+ws "whitespace" = [ \t\n\r]*

-array
-  = "[" _ "]" _                   { return [];       }
-  / "[" _ elements:elements "]" _ { return elements; }
-
-elements
-  = head:value tail:("," _ value)* {
-      var result = [head];
-      for (var i = 0; i < tail.length; i++) {
-        result.push(tail[i][2]);
-      }
-      return result;
-    }
+/* ----- 3. Values ----- */

 value
-  = string
-  / number
+  = false
+  / null
+  / true
  / object
  / array
-  / "true" _  { return true;  }
-  / "false" _ { return false; }
-  / "null" _  { return null;  }
-
-/* ===== Lexical Elements ===== */
-
-string "string"
-  = '"' '"' _             { return "";    }
-  / '"' chars:chars '"' _ { return chars; }
+  / number
+  / string

-chars
-  = chars:char+ { return chars.join(""); }
+false = "false" { return false; }
+null  = "null"  { return null;  }
+true  = "true"  { return true;  }

-char
-  // In the original JSON grammar: "any-Unicode-character-except-"-or-\-or-control-character"
-  = [^"\\\0-\x1F\x7f]
-  / '\\"'  { return '"';  }
-  / "\\\\" { return "\\"; }
-  / "\\/"  { return "/";  }
-  / "\\b"  { return "\b"; }
-  / "\\f"  { return "\f"; }
-  / "\\n"  { return "\n"; }
-  / "\\r"  { return "\r"; }
-  / "\\t"  { return "\t"; }
-  / "\\u" digits:$(hexDigit hexDigit hexDigit hexDigit) {
-      return String.fromCharCode(parseInt(digits, 16));
-    }
+/* ----- 4. Objects ----- */

-number "number"
-  = parts:$(int frac exp) _ { return parseFloat(parts); }
-  / parts:$(int frac) _     { return parseFloat(parts); }
-  / parts:$(int exp) _      { return parseFloat(parts); }
-  / parts:$(int) _          { return parseFloat(parts); }
+object
+  = begin_object
+    members:(
+      first:member
+      rest:(value_separator m:member { return m; })*
+      {
+        var result = {}, i;

-int
-  = digit19 digits
-  / digit
-  / "-" digit19 digits
-  / "-" digit
+        result[first.name] = first.value;

-frac
-  = "." digits
+        for (i = 0; i < rest.length; i++) {
+          result[rest[i].name] = rest[i].value;
+        }

-exp
-  = e digits
+        return result;
+      }
+    )?
+    end_object
+    { return members !== null ? members: {}; }

-digits
-  = digit+
+member
+  = name:string name_separator value:value {
+      return { name: name, value: value };
+    }

-e
-  = [eE] [+-]?
+/* ----- 5. Arrays ----- */

-/*
- * The following rules are not present in the original JSON gramar, but they are
- * assumed to exist implicitly.
- *
- * FIXME: Define them according to ECMA-262, 5th ed.
- */
+array
+  = begin_array
+    values:(
+      first:value
+      rest:(value_separator v:value { return v; })*
+      { return [first].concat(rest); }
+    )?
+    end_array
+    { return values !== null ? values : []; }

-digit
-  = [0-9]
+/* ----- 6. Numbers ----- */

-digit19
-  = [1-9]
+number "number"
+  = minus? int frac? exp? { return parseFloat(text()); }

-hexDigit
-  = [0-9a-fA-F]
+decimal_point = "."
+digit1_9      = [1-9]
+e             = [eE]
+exp           = e (minus / plus)? DIGIT+
+frac          = decimal_point DIGIT+
+int           = zero / (digit1_9 DIGIT*)
+minus         = "-"
+plus          = "+"
+zero          = "0"

-/* ===== Whitespace ===== */
+/* ----- 7. Strings ----- */

-_ "whitespace"
-  = whitespace*
+string "string"
+  = quotation_mark chars:char* quotation_mark { return chars.join(""); }

-// Whitespace is undefined in the original JSON grammar, so I assume a simple
-// conventional definition consistent with ECMA-262, 5th ed.
-whitespace
-  = [ \t\n\r]
+char
+  = unescaped
+  / escape
+    sequence:(
+        '"'
+      / "\\"
+      / "/"
+      / "b" { return "\b"; }
+      / "f" { return "\f"; }
+      / "n" { return "\n"; }
+      / "r" { return "\r"; }
+      / "t" { return "\t"; }
+      / "u" digits:$(HEXDIG HEXDIG HEXDIG HEXDIG) {
+          return String.fromCharCode(parseInt(digits, 16));
+        }
+    )
+    { return sequence; }
+
+escape         = "\\"
+quotation_mark = '"'
+unescaped      = [\x20-\x21\x23-\x5B\x5D-\u10FFFF]
+
+/* ----- Core ABNF Rules ----- */
+
+/* See RFC 4234, Appendix B (http://tools.ietf.org/html/rfc4627). */
+DIGIT  = [0-9]
+HEXDIG = [0-9a-f]i