Complete rewrite of the JSON example grammar

This is a complete rewrite of the JSON example grammar. It is now based on RFC 7159 instead of an informal description at the JSON website. Beside this, the rewrite reflects how I write grammars today (as opposed to few years ago) and what style I would recommend to others.
10 years ago · fba70833dd
parent f5443d2bf1
commit fba70833dd
1 changed files with 108 additions and 95 deletions
--- a/examples/json.pegjs
+++ b/examples/json.pegjs
@ -1,119 +1,132 @@
-/* JSON parser based on the grammar described at http://json.org/. */
+/*
 * JSON Grammar
 * ============
 *
 * Based on the grammar from RFC 7159 [1].
 *
 * Note that JSON is also specified in ECMA-262 [2], ECMA-404 [3], and on the
 * JSON website [4] (somewhat informally). The RFC seems the most authoritative
 * source, which is confirmed e.g. by [5].
 *
 * [1] http://tools.ietf.org/html/rfc7159
 * [2] http://www.ecma-international.org/publications/standards/Ecma-262.htm
 * [3] http://www.ecma-international.org/publications/standards/Ecma-404.htm
 * [4] http://json.org/
 * [5] https://www.tbray.org/ongoing/When/201x/2014/03/05/RFC7159-JSON
 */
-/* ===== Syntactical Elements ===== */
+/* ----- 2. JSON Grammar ----- */
-start
+JSON_text
-  = _ object:object { return object; }
+  = ws value:value ws { return value; }
-object
+begin_array     = ws "[" ws
-  = "{" _ "}" _                 { return {};      }
+begin_object    = ws "{" ws
-  / "{" _ members:members "}" _ { return members; }
+end_array       = ws "]" ws
-
+end_object      = ws "}" ws
-members
+name_separator  = ws ":" ws
-  = head:pair tail:("," _ pair)* {
+value_separator = ws "," ws
      var result = {};
      result[head[0]] = head[1];
      for (var i = 0; i < tail.length; i++) {
        result[tail[i][2][0]] = tail[i][2][1];
      }
      return result;
    }
-pair
+ws "whitespace" = [ \t\n\r]*
  = name:string ":" _ value:value { return [name, value]; }
-array
+/* ----- 3. Values ----- */
  = "[" _ "]" _                   { return [];       }
  / "[" _ elements:elements "]" _ { return elements; }
 elements
  = head:value tail:("," _ value)* {
      var result = [head];
      for (var i = 0; i < tail.length; i++) {
        result.push(tail[i][2]);
      }
      return result;
    }
 value
-  = string
+  = false
-  / number
+  / null
  / true
  / object
  / array
-  / "true" _  { return true;  }
+  / number
-  / "false" _ { return false; }
+  / string
  / "null" _  { return null;  }
-/* ===== Lexical Elements ===== */
+false = "false" { return false; }
 null  = "null"  { return null;  }
 true  = "true"  { return true;  }
-string "string"
+/* ----- 4. Objects ----- */
  = '"' '"' _             { return "";    }
  / '"' chars:chars '"' _ { return chars; }
-chars
+object
-  = chars:char+ { return chars.join(""); }
+  = begin_object
    members:(
      first:member
      rest:(value_separator m:member { return m; })*
      {
        var result = {}, i;
-char
+        result[first.name] = first.value;
-  // In the original JSON grammar: "any-Unicode-character-except-"-or-\-or-control-character"
+
-  = [^"\\\0-\x1F\x7f]
+        for (i = 0; i < rest.length; i++) {
-  / '\\"'  { return '"';  }
+          result[rest[i].name] = rest[i].value;
  / "\\\\" { return "\\"; }
  / "\\/"  { return "/";  }
  / "\\b"  { return "\b"; }
  / "\\f"  { return "\f"; }
  / "\\n"  { return "\n"; }
  / "\\r"  { return "\r"; }
  / "\\t"  { return "\t"; }
  / "\\u" digits:$(hexDigit hexDigit hexDigit hexDigit) {
      return String.fromCharCode(parseInt(digits, 16));
        }
-number "number"
+        return result;
-  = parts:$(int frac exp) _ { return parseFloat(parts); }
+      }
-  / parts:$(int frac) _     { return parseFloat(parts); }
+    )?
-  / parts:$(int exp) _      { return parseFloat(parts); }
+    end_object
-  / parts:$(int) _          { return parseFloat(parts); }
+    { return members !== null ? members: {}; }
-int
+member
-  = digit19 digits
+  = name:string name_separator value:value {
-  / digit
+      return { name: name, value: value };
-  / "-" digit19 digits
+    }
  / "-" digit
-frac
+/* ----- 5. Arrays ----- */
  = "." digits
-exp
+array
-  = e digits
+  = begin_array
    values:(
      first:value
      rest:(value_separator v:value { return v; })*
      { return [first].concat(rest); }
    )?
    end_array
    { return values !== null ? values : []; }
-digits
+/* ----- 6. Numbers ----- */
  = digit+
-e
+number "number"
-  = [eE] [+-]?
+  = minus? int frac? exp? { return parseFloat(text()); }
-/*
+decimal_point = "."
- * The following rules are not present in the original JSON gramar, but they are
+digit1_9      = [1-9]
- * assumed to exist implicitly.
+e             = [eE]
- *
+exp           = e (minus / plus)? DIGIT+
- * FIXME: Define them according to ECMA-262, 5th ed.
+frac          = decimal_point DIGIT+
- */
+int           = zero / (digit1_9 DIGIT*)
 minus         = "-"
 plus          = "+"
 zero          = "0"
-digit
+/* ----- 7. Strings ----- */
  = [0-9]
-digit19
+string "string"
-  = [1-9]
+  = quotation_mark chars:char* quotation_mark { return chars.join(""); }
-hexDigit
+char
-  = [0-9a-fA-F]
+  = unescaped
  / escape
    sequence:(
        '"'
      / "\\"
      / "/"
      / "b" { return "\b"; }
      / "f" { return "\f"; }
      / "n" { return "\n"; }
      / "r" { return "\r"; }
      / "t" { return "\t"; }
      / "u" digits:$(HEXDIG HEXDIG HEXDIG HEXDIG) {
          return String.fromCharCode(parseInt(digits, 16));
        }
    )
    { return sequence; }
-/* ===== Whitespace ===== */
+escape         = "\\"
 quotation_mark = '"'
 unescaped      = [\x20-\x21\x23-\x5B\x5D-\u10FFFF]
-_ "whitespace"
+/* ----- Core ABNF Rules ----- */
  = whitespace*
-// Whitespace is undefined in the original JSON grammar, so I assume a simple
+/* See RFC 4234, Appendix B (http://tools.ietf.org/html/rfc4627). */
-// conventional definition consistent with ECMA-262, 5th ed.
+DIGIT  = [0-9]
-whitespace
+HEXDIG = [0-9a-f]i
  = [ \t\n\r]