pegjs/lib/metagrammar.pegjs

grammar
  = __ rule+ {
      var result = {};
      PEG.ArrayUtils.each($2, function(rule) { result[rule.name] = rule; });
      return result;
    }

rule
  = identifier (literal / "") equals expression {
      return {
        type:        "rule",
        name:        $1,
        displayName: $2 !== "" ? $2 : null,
        expression:  $4
      };
    }

expression
  = choice

choice
  = sequence (slash sequence)* {
      if ($2.length > 0) {
        var alternatives = [$1].concat(PEG.ArrayUtils.map(
            $2,
            function(element) { return element[1]; }
        ));
        return {
          type:         "choice",
          alternatives: alternatives
        }
      } else {
        return $1;
      }
    }

sequence
  = prefixed* action {
      var expression = $1.length != 1
        ? {
            type:     "sequence",
            elements: $1
          }
        : $1[0];
      return {
        type:       "action",
        expression: expression,
        action:     $2
      };
    }
  / prefixed* {
      return $1.length != 1
        ? {
            type:     "sequence",
            elements: $1
          }
        : $1[0];
    }

prefixed
  = and suffixed { return { type: "and_predicate", expression: $2 }; }
  / not suffixed { return { type: "not_predicate", expression: $2 }; }
  / suffixed

suffixed
  = primary question { return { type: "optional",     expression: $1}; }
  / primary star     { return { type: "zero_or_more", expression: $1}; }
  / primary plus     { return { type: "one_or_more",  expression: $1}; }
  / primary

primary
  = identifier !(( literal / "") equals) { return { type: "rule_ref", name:  $1 }; }
  / literal                              { return { type: "literal",  value: $1 }; }
  / dot                                  { return { type: "any"                 }; }
  / class
  / lparen expression rparen             { return $2; }

/* "Lexical" elements */

action "action"
  = braced __ { return $1.substr(1, $1.length - 2); }

braced
  = "{" (braced / nonBraceCharacter)* "}" { return $1 + $2.join("") + $3; }

nonBraceCharacters
  = nonBraceCharacter+ { return $1.join(""); }

nonBraceCharacter
  = [^{}]

equals   = "=" __ { return $1; }
slash    = "/" __ { return $1; }
and      = "&" __ { return $1; }
not      = "!" __ { return $1; }
question = "?" __ { return $1; }
star     = "*" __ { return $1; }
plus     = "+" __ { return $1; }
lparen   = "(" __ { return $1; }
rparen   = ")" __ { return $1; }
dot      = "." __ { return $1; }

/*
 * Modelled after ECMA-262, 5th ed., 7.6, but much simplified:
 *
 * * no Unicode escape sequences
 *
 * * "Unicode combining marks" and "Unicode connection punctuation" can't be
 *   part of the identifier
 *
 * * only [a-zA-Z] is considered a "Unicode letter"
 *
 * * only [0-9] is considered a "Unicode digit"
 *
 * The simplifications were made just to make the implementation little bit
 * easier, there is no "philosophical" reason behind them.
 */
identifier "identifier"
  = (letter / "_" / "$") (letter / digit / "_" / "$")* __ {
      return $1 + $2.join("");
    }

/*
 * Modelled after ECMA-262, 5th ed., 7.8.4. (syntax & semantics, rules only
 * vaguely).
 */
literal "literal"
  = (doubleQuotedLiteral / singleQuotedLiteral) __ { return $1; }

doubleQuotedLiteral
  = '"' doubleQuotedCharacter* '"' { return $2.join(""); }

doubleQuotedCharacter
  = simpleDoubleQuotedCharacter
  / simpleEscapeSequence
  / zeroEscapeSequence
  / hexEscapeSequence
  / unicodeEscapeSequence
  / eolEscapeSequence

simpleDoubleQuotedCharacter
  = !('"' / "\\" / eolChar) . { return $2; }

singleQuotedLiteral
  = "'" singleQuotedCharacter* "'" { return $2.join(""); }

singleQuotedCharacter
  = simpleSingleQuotedCharacter
  / simpleEscapeSequence
  / zeroEscapeSequence
  / hexEscapeSequence
  / unicodeEscapeSequence
  / eolEscapeSequence

simpleSingleQuotedCharacter
  = !("'" / "\\" / eolChar) . { return $2; }

class "character class"
  = "[" "^"? (classCharacterRange / classCharacter)* "]" __ {
      parts = PEG.ArrayUtils.map($3, function(part) { return part.data; });
      rawText = "["
        + $2
        + PEG.ArrayUtils.map($3, function(part) {
            return part.rawText;
          }).join("")
        + "]";

      return {
        type:     "class",
        inverted: $2 === "^",
        parts:    parts,
        // FIXME: Get the raw text from the input directly.
        rawText:  rawText
      };
    }

classCharacterRange
  = classCharacter "-" classCharacter {
      if ($1.data.charCodeAt(0) > $3.data.charCodeAt(0)) {
        throw new this.SyntaxError(
          "Invalid character range: " + $1.rawText + "-" + $3.rawText + "."
        );
      }

      return {
        data:    [$1.data, $3.data],
        // FIXME: Get the raw text from the input directly.
        rawText: $1.rawText + "-" + $3.rawText
      }
    }

classCharacter
  = bracketDelimitedCharacter {
      return {
        data:    $1,
        // FIXME: Get the raw text from the input directly.
        rawText: PEG.RegExpUtils.quoteForClass($1)
      };
    }

bracketDelimitedCharacter
  = simpleBracketDelimitedCharacter
  / simpleEscapeSequence
  / zeroEscapeSequence
  / hexEscapeSequence
  / unicodeEscapeSequence
  / eolEscapeSequence

simpleBracketDelimitedCharacter
  = !("]" / "\\" / eolChar) . { return $2; }

simpleEscapeSequence
  = "\\" !(digit / "x" / "u" / eolChar) . {
      return $3
        .replace("b", "\b")
        .replace("f", "\f")
        .replace("n", "\n")
        .replace("r", "\r")
        .replace("t", "\t")
        .replace("v", "\x0B") // IE does not recognize "\v".
    }

zeroEscapeSequence
  = "\\0" !digit { return "\0"; }

hexEscapeSequence
  = "\\x" hexDigit hexDigit {
      return String.fromCharCode(parseInt("0x" + $2 + $3));
    }

unicodeEscapeSequence
  = "\\u" hexDigit hexDigit hexDigit hexDigit {
      return String.fromCharCode(parseInt("0x" + $2 + $3 + $4 + $5));
    }

eolEscapeSequence
  = "\\" eol { return $2; }

digit
  = [0-9]

hexDigit
  = [0-9a-fA-F]

letter
  = lowerCaseLetter
  / upperCaseLetter

lowerCaseLetter
  = [a-z]

upperCaseLetter
  = [A-Z]

__ = (whitespace / eol / comment)*

/* Modelled after ECMA-262, 5th ed., 7.4. */
comment "comment"
  = singleLineComment
  / multiLineComment

singleLineComment
  = "//" (!eolChar .)*

multiLineComment
  = "/*" (!"*/" .)* "*/"

/* Modelled after ECMA-262, 5th ed., 7.3. */
eol "end of line"
  = "\n"
  / "\r\n"
  / "\r"
  / "\u2028"
  / "\u2029"

eolChar
  = [\n\r\u2028\u2029]

/* Modelled after ECMA-262, 5th ed., 7.2. */
whitespace "whitespace"
  = [ \t\v\f\u00A0\uFEFF\u1680\u180E\u2000-\u200A\u202F\u205F\u3000]