pegjs/lib/metagrammar.pegjs

grammar = __ rule+ {
  var result = {};
  PEG.ArrayUtils.each($2, function(rule) { result[rule.name] = rule; });
  return result;
}

rule = identifier (literal / "") equals expression {
  return {
    type:        "rule",
    name:        $1,
    displayName: $2 !== "" ? $2 : null,
    expression:  $4
  };
}

expression = choice

choice = sequence (slash sequence)* {
  if ($2.length > 0) {
    var alternatives = [$1].concat(PEG.ArrayUtils.map(
        $2,
        function(element) { return element[1]; }
    ));
    return {
      type:         "choice",
      alternatives: alternatives
    }
  } else {
    return $1;
  }
}

sequence
  = prefixed* action {
      var expression = $1.length != 1
        ? {
            type:     "sequence",
            elements: $1
          }
        : $1[0];
      return {
        type:       "action",
        expression: expression,
        action:     $2
      };
    }
  / prefixed* {
      return $1.length != 1
        ? {
            type:     "sequence",
            elements: $1
          }
        : $1[0];
    }

prefixed
  = and suffixed { return { type: "and_predicate", expression: $2 }; }
  / not suffixed { return { type: "not_predicate", expression: $2 }; }
  / suffixed

suffixed
  = primary question { return { type: "optional",     expression: $1}; }
  / primary star     { return { type: "zero_or_more", expression: $1}; }
  / primary plus     { return { type: "one_or_more",  expression: $1}; }
  / primary

primary
  = identifier !(( literal / "") equals) { return { type: "rule_ref", name:  $1 }; }
  / literal                              { return { type: "literal",  value: $1 }; }
  / dot                                  { return { type: "any"                 }; }
  / class
  / lparen expression rparen             { return $2; }

/* "Lexical" elements */

action "action" = braced __ { return $1.substr(1, $1.length - 2); }

braced = "{" (braced / nonBraceCharacter)* "}" { return $1 + $2.join("") + $3; }

nonBraceCharacters = nonBraceCharacter+ { return $1.join(""); }

nonBraceCharacter: [^{}]

equals   = "=" __ { return $1; }
slash    = "/" __ { return $1; }
and      = "&" __ { return $1; }
not      = "!" __ { return $1; }
question = "?" __ { return $1; }
star     = "*" __ { return $1; }
plus     = "+" __ { return $1; }
lparen   = "(" __ { return $1; }
rparen   = ")" __ { return $1; }
dot      = "." __ { return $1; }

/*
 * Modelled after ECMA-262, 5th ed., 7.6, but much simplified:
 *
 * * no Unicode escape sequences
 *
 * * "Unicode combining marks" and "Unicode connection punctuation" can't be
 *   part of the identifier
 *
 * * only [a-zA-Z] is considered a "Unicode letter"
 *
 * * only [0-9] is considered a "Unicode digit"
 *
 * The simplifications were made just to make the implementation little bit
 * easier, there is no "philosophical" reason behind them.
 */
identifier "identifier" = (letter / "_" / "$") (letter / digit / "_" / "$")* __ {
  return $1 + $2.join("");
}

/*
 * Modelled after ECMA-262, 5th ed., 7.8.4. (syntax & semantics, rules only
 * vaguely).
 */
literal "literal" = (doubleQuotedLiteral / singleQuotedLiteral) __ { return $1; }

doubleQuotedLiteral = '"' doubleQuotedCharacter* '"' { return $2.join(""); }

doubleQuotedCharacter
  = simpleDoubleQuotedCharacter
  / simpleEscapeSequence
  / zeroEscapeSequence
  / hexEscapeSequence
  / unicodeEscapeSequence
  / eolEscapeSequence

simpleDoubleQuotedCharacter = !('"' / "\\" / eolChar) . { return $2; }

singleQuotedLiteral = "'" singleQuotedCharacter* "'" { return $2.join(""); }

singleQuotedCharacter
  = simpleSingleQuotedCharacter
  / simpleEscapeSequence
  / zeroEscapeSequence
  / hexEscapeSequence
  / unicodeEscapeSequence
  / eolEscapeSequence

simpleSingleQuotedCharacter = !("'" / "\\" / eolChar) . { return $2; }

class "character class" = "[" "^"? (classCharacterRange / classCharacter)* "]" __ {
  parts = PEG.ArrayUtils.map($3, function(part) { return part.data; });
  rawText = "["
    + $2
    + PEG.ArrayUtils.map($3, function(part) { return part.rawText; }).join("")
    + "]";

  return {
    type:     "class",
    inverted: $2 === "^",
    parts:    parts,
    // FIXME: Get the raw text from the input directly.
    rawText:  rawText
  };
}

classCharacterRange = classCharacter "-" classCharacter {
  if ($1.data.charCodeAt(0) > $3.data.charCodeAt(0)) {
    throw new this.SyntaxError(
      "Invalid character range: " + $1.rawText + "-" + $3.rawText + "."
    );
  }

  return {
    data:    [$1.data, $3.data],
    // FIXME: Get the raw text from the input directly.
    rawText: $1.rawText + "-" + $3.rawText
  }
}

classCharacter = bracketDelimitedCharacter {
  return {
    data:    $1,
    // FIXME: Get the raw text from the input directly.
    rawText: PEG.RegExpUtils.quoteForClass($1)
  };
}

bracketDelimitedCharacter
  = simpleBracketDelimitedCharacter
  / simpleEscapeSequence
  / zeroEscapeSequence
  / hexEscapeSequence
  / unicodeEscapeSequence
  / eolEscapeSequence

simpleBracketDelimitedCharacter = !("]" / "\\" / eolChar) . { return $2; }

simpleEscapeSequence = "\\" !(digit / "x" / "u" / eolChar) . {
  return $3
    .replace("b", "\b")
    .replace("f", "\f")
    .replace("n", "\n")
    .replace("r", "\r")
    .replace("t", "\t")
    .replace("v", "\x0B") // IE does not recognize "\v".
}

zeroEscapeSequence = "\\0" !digit { return "\0"; }

hexEscapeSequence = "\\x" hexDigit hexDigit {
  return String.fromCharCode(parseInt("0x" + $2 + $3));
}

unicodeEscapeSequence = "\\u" hexDigit hexDigit hexDigit hexDigit {
  return String.fromCharCode(parseInt("0x" + $2 + $3 + $4 + $5));
}

eolEscapeSequence = "\\" eol { return $2; }

digit = [0-9]

hexDigit = [0-9a-fA-F]

letter = lowerCaseLetter / upperCaseLetter

lowerCaseLetter = [a-z]

upperCaseLetter = [A-Z]

__ = (whitespace / eol / comment)*

/* Modelled after ECMA-262, 5th ed., 7.4. */
comment "comment" = singleLineComment / multiLineComment

singleLineComment = "//" (!eolChar .)*

multiLineComment = "/*" (!"*/" .)* "*/"

/* Modelled after ECMA-262, 5th ed., 7.3. */
eol "end of line" = "\n" / "\r\n" / "\r" / "\u2028" / "\u2029"

eolChar = [\n\r\u2028\u2029]

/* Modelled after ECMA-262, 5th ed., 7.2. */
whitespace "whitespace" = [ \t\v\f\u00A0\uFEFF\u1680\u180E\u2000-\u200A\u202F\u205F\u3000]