parent
1da6abc458
commit
6294bb5b13
14
Makefile
14
Makefile
|
@ -68,14 +68,12 @@ browser:
|
|||
rm -f $(BROWSER_FILE_DEV)
|
||||
rm -f $(BROWSER_FILE_MIN)
|
||||
|
||||
echo '/*' >> $(BROWSER_FILE_DEV)
|
||||
echo " * PEG.js $(PEGJS_VERSION)" >> $(BROWSER_FILE_DEV)
|
||||
echo ' *' >> $(BROWSER_FILE_DEV)
|
||||
echo ' * http://pegjs.org/' >> $(BROWSER_FILE_DEV)
|
||||
echo ' *' >> $(BROWSER_FILE_DEV)
|
||||
echo ' * Copyright (c) 2010-2016 David Majda' >> $(BROWSER_FILE_DEV)
|
||||
echo ' * Licensed under the MIT license.' >> $(BROWSER_FILE_DEV)
|
||||
echo ' */' >> $(BROWSER_FILE_DEV)
|
||||
echo "// PEG.js $(PEGJS_VERSION)" >> $(BROWSER_FILE_DEV)
|
||||
echo '//' >> $(BROWSER_FILE_DEV)
|
||||
echo '// http://pegjs.org/' >> $(BROWSER_FILE_DEV)
|
||||
echo '//' >> $(BROWSER_FILE_DEV)
|
||||
echo '// Copyright (c) 2010-2016 David Majda' >> $(BROWSER_FILE_DEV)
|
||||
echo '// Licensed under the MIT license.' >> $(BROWSER_FILE_DEV)
|
||||
|
||||
$(BROWSERIFY) \
|
||||
--standalone peg \
|
||||
|
|
|
@ -6,7 +6,7 @@ let Runner = require("./runner.js");
|
|||
$("#run").click(() => {
|
||||
"use strict";
|
||||
|
||||
/* Results Table Manipulation */
|
||||
// Results Table Manipulation
|
||||
|
||||
let resultsTable = $("#results-table");
|
||||
|
||||
|
@ -49,18 +49,16 @@ $("#run").click(() => {
|
|||
);
|
||||
}
|
||||
|
||||
/* Main */
|
||||
// Main
|
||||
|
||||
/*
|
||||
* Each input is parsed multiple times and the results are averaged. We
|
||||
* do this for two reasons:
|
||||
*
|
||||
* 1. To warm up the interpreter (PEG.js-generated parsers will be
|
||||
* most likely used repeatedly, so it makes sense to measure
|
||||
* performance after warming up).
|
||||
*
|
||||
* 2. To minimize random errors.
|
||||
*/
|
||||
// Each input is parsed multiple times and the results are averaged. We
|
||||
// do this for two reasons:
|
||||
//
|
||||
// 1. To warm up the interpreter (PEG.js-generated parsers will be
|
||||
// most likely used repeatedly, so it makes sense to measure
|
||||
// performance after warming up).
|
||||
//
|
||||
// 2. To minimize random errors.
|
||||
|
||||
let runCount = parseInt($("#run-count").val(), 10);
|
||||
let options = {
|
||||
|
@ -84,7 +82,7 @@ $("#run").click(() => {
|
|||
},
|
||||
|
||||
testStart: function() {
|
||||
/* Nothing to do. */
|
||||
// Nothing to do.
|
||||
},
|
||||
|
||||
testFinish: function(benchmark, test, inputSize, parseTime) {
|
||||
|
|
|
@ -10,7 +10,7 @@ let fs = require("fs");
|
|||
let benchmarks = require("./benchmarks.js");
|
||||
let Runner = require("./runner.js");
|
||||
|
||||
/* Results Table Manipulation */
|
||||
// Results Table Manipulation
|
||||
|
||||
function dup(text, count) {
|
||||
let result = "";
|
||||
|
@ -70,7 +70,7 @@ function writeTableFooter() {
|
|||
console.log("└─────────────────────────────────────┴───────────┴────────────┴──────────────┘");
|
||||
}
|
||||
|
||||
/* Helpers */
|
||||
// Helpers
|
||||
|
||||
function printHelp() {
|
||||
console.log("Usage: run [options]");
|
||||
|
@ -97,7 +97,7 @@ function abort(message) {
|
|||
exitFailure();
|
||||
}
|
||||
|
||||
/* Arguments */
|
||||
// Arguments
|
||||
|
||||
let args = process.argv.slice(2); // Trim "node" and the script path.
|
||||
|
||||
|
@ -109,7 +109,7 @@ function nextArg() {
|
|||
args.shift();
|
||||
}
|
||||
|
||||
/* Main */
|
||||
// Main
|
||||
|
||||
let runCount = 10;
|
||||
let options = {
|
||||
|
@ -169,7 +169,7 @@ Runner.run(benchmarks, runCount, options, {
|
|||
},
|
||||
|
||||
testStart: function() {
|
||||
/* Nothing to do. */
|
||||
// Nothing to do.
|
||||
},
|
||||
|
||||
testFinish: function(benchmark, test, inputSize, parseTime) {
|
||||
|
|
|
@ -7,7 +7,7 @@ let peg = require("../lib/peg");
|
|||
let Runner = {
|
||||
run: function(benchmarks, runCount, options, callbacks) {
|
||||
|
||||
/* Queue */
|
||||
// Queue
|
||||
|
||||
let Q = {
|
||||
functions: [],
|
||||
|
@ -20,29 +20,25 @@ let Runner = {
|
|||
if (this.functions.length > 0) {
|
||||
this.functions.shift()();
|
||||
|
||||
/*
|
||||
* We can't use |arguments.callee| here because |this| would get
|
||||
* messed-up in that case.
|
||||
*/
|
||||
// We can't use |arguments.callee| here because |this| would get
|
||||
// messed-up in that case.
|
||||
setTimeout(() => { Q.run(); }, 0);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/*
|
||||
* The benchmark itself is factored out into several functions (some of them
|
||||
* generated), which are enqueued and run one by one using |setTimeout|. We
|
||||
* do this for two reasons:
|
||||
*
|
||||
* 1. To avoid bowser mechanism for interrupting long-running scripts to
|
||||
* kick-in (or at least to not kick-in that often).
|
||||
*
|
||||
* 2. To ensure progressive rendering of results in the browser (some
|
||||
* browsers do not render at all when running JavaScript code).
|
||||
*
|
||||
* The enqueued functions share state, which is all stored in the properties
|
||||
* of the |state| object.
|
||||
*/
|
||||
// The benchmark itself is factored out into several functions (some of them
|
||||
// generated), which are enqueued and run one by one using |setTimeout|. We
|
||||
// do this for two reasons:
|
||||
//
|
||||
// 1. To avoid bowser mechanism for interrupting long-running scripts to
|
||||
// kick-in (or at least to not kick-in that often).
|
||||
//
|
||||
// 2. To ensure progressive rendering of results in the browser (some
|
||||
// browsers do not render at all when running JavaScript code).
|
||||
//
|
||||
// The enqueued functions share state, which is all stored in the properties
|
||||
// of the |state| object.
|
||||
|
||||
let state = {};
|
||||
|
||||
|
@ -104,7 +100,7 @@ let Runner = {
|
|||
callbacks.finish(state.totalInputSize, state.totalParseTime);
|
||||
}
|
||||
|
||||
/* Main */
|
||||
// Main
|
||||
|
||||
Q.add(initialize);
|
||||
benchmarks.forEach(benchmark => {
|
||||
|
|
|
@ -5,11 +5,9 @@
|
|||
|
||||
"use strict";
|
||||
|
||||
/*
|
||||
* Small server whose main purpose is to ensure that both the benchmarked code
|
||||
* and the benchmark get passed through Babel & Browserify before they are
|
||||
* served to the browser.
|
||||
*/
|
||||
// Small server whose main purpose is to ensure that both the benchmarked code
|
||||
// and the benchmark get passed through Babel & Browserify before they are
|
||||
// served to the browser.
|
||||
|
||||
let express = require("express");
|
||||
let logger = require("morgan");
|
||||
|
|
14
bin/pegjs
14
bin/pegjs
|
@ -6,7 +6,7 @@ let fs = require("fs");
|
|||
let path = require("path");
|
||||
let peg = require("../lib/peg");
|
||||
|
||||
/* Helpers */
|
||||
// Helpers
|
||||
|
||||
function printVersion() {
|
||||
console.log("PEG.js " + peg.VERSION);
|
||||
|
@ -74,15 +74,13 @@ function addExtraOptions(options, json) {
|
|||
});
|
||||
}
|
||||
|
||||
/*
|
||||
* Extracted into a function just to silence JSHint complaining about creating
|
||||
* functions in a loop.
|
||||
*/
|
||||
// Extracted into a function just to silence JSHint complaining about creating
|
||||
// functions in a loop.
|
||||
function trim(s) {
|
||||
return s.trim();
|
||||
}
|
||||
|
||||
/* Arguments */
|
||||
// Arguments
|
||||
|
||||
let args = process.argv.slice(2); // Trim "node" and the script path.
|
||||
|
||||
|
@ -94,7 +92,7 @@ function nextArg() {
|
|||
args.shift();
|
||||
}
|
||||
|
||||
/* Files */
|
||||
// Files
|
||||
|
||||
function readStream(inputStream, callback) {
|
||||
let input = "";
|
||||
|
@ -102,7 +100,7 @@ function readStream(inputStream, callback) {
|
|||
inputStream.on("end", () => { callback(input); });
|
||||
}
|
||||
|
||||
/* Main */
|
||||
// Main
|
||||
|
||||
let inputFile = null;
|
||||
let outputFile = null;
|
||||
|
|
|
@ -1,9 +1,7 @@
|
|||
/*
|
||||
* Simple Arithmetics Grammar
|
||||
* ==========================
|
||||
*
|
||||
* Accepts expressions like "2 * (3 + 4)" and computes their value.
|
||||
*/
|
||||
// Simple Arithmetics Grammar
|
||||
// ==========================
|
||||
//
|
||||
// Accepts expressions like "2 * (3 + 4)" and computes their value.
|
||||
|
||||
Expression
|
||||
= head:Term tail:(_ ("+" / "-") _ Term)* {
|
||||
|
|
|
@ -1,23 +1,21 @@
|
|||
/*
|
||||
* CSS Grammar
|
||||
* ===========
|
||||
*
|
||||
* Based on grammar from CSS 2.1 specification [1] (including the errata [2]).
|
||||
* Generated parser builds a syntax tree composed of nested JavaScript objects,
|
||||
* vaguely inspired by CSS DOM [3]. The CSS DOM itself wasn't used as it is not
|
||||
* expressive enough (e.g. selectors are reflected as text, not structured
|
||||
* objects) and somewhat cumbersome.
|
||||
*
|
||||
* Limitations:
|
||||
*
|
||||
* * Many errors which should be recovered from according to the specification
|
||||
* (e.g. malformed declarations or unexpected end of stylesheet) are fatal.
|
||||
* This is a result of straightforward rewrite of the CSS grammar to PEG.js.
|
||||
*
|
||||
* [1] http://www.w3.org/TR/2011/REC-CSS2-20110607
|
||||
* [2] http://www.w3.org/Style/css2-updates/REC-CSS2-20110607-errata.html
|
||||
* [3] http://www.w3.org/TR/DOM-Level-2-Style/css.html
|
||||
*/
|
||||
// CSS Grammar
|
||||
// ===========
|
||||
//
|
||||
// Based on grammar from CSS 2.1 specification [1] (including the errata [2]).
|
||||
// Generated parser builds a syntax tree composed of nested JavaScript objects,
|
||||
// vaguely inspired by CSS DOM [3]. The CSS DOM itself wasn't used as it is not
|
||||
// expressive enough (e.g. selectors are reflected as text, not structured
|
||||
// objects) and somewhat cumbersome.
|
||||
//
|
||||
// Limitations:
|
||||
//
|
||||
// * Many errors which should be recovered from according to the specification
|
||||
// (e.g. malformed declarations or unexpected end of stylesheet) are fatal.
|
||||
// This is a result of straightforward rewrite of the CSS grammar to PEG.js.
|
||||
//
|
||||
// [1] http://www.w3.org/TR/2011/REC-CSS2-20110607
|
||||
// [2] http://www.w3.org/Style/css2-updates/REC-CSS2-20110607-errata.html
|
||||
// [3] http://www.w3.org/TR/DOM-Level-2-Style/css.html
|
||||
|
||||
{
|
||||
function extractOptional(optional, index) {
|
||||
|
@ -49,7 +47,7 @@
|
|||
start
|
||||
= stylesheet:stylesheet comment* { return stylesheet; }
|
||||
|
||||
/* ----- G.1 Grammar ----- */
|
||||
// ----- G.1 Grammar -----
|
||||
|
||||
stylesheet
|
||||
= charset:(CHARSET_SYM STRING ";")? (S / CDO / CDC)*
|
||||
|
@ -244,9 +242,9 @@ function
|
|||
hexcolor
|
||||
= value:HASH S* { return { type: "Hexcolor", value: value }; }
|
||||
|
||||
/* ----- G.2 Lexical scanner ----- */
|
||||
// ----- G.2 Lexical scanner -----
|
||||
|
||||
/* Macros */
|
||||
// Macros
|
||||
|
||||
h
|
||||
= [0-9a-f]i
|
||||
|
@ -338,7 +336,7 @@ U = "u"i / "\\" "0"? "0"? "0"? "0"? [\x55\x75] ("\r\n" / [ \t\r\n\f])? / "\\u"i
|
|||
X = "x"i / "\\" "0"? "0"? "0"? "0"? [\x58\x78] ("\r\n" / [ \t\r\n\f])? / "\\x"i { return "x"; }
|
||||
Z = "z"i / "\\" "0"? "0"? "0"? "0"? [\x5a\x7a] ("\r\n" / [ \t\r\n\f])? / "\\z"i { return "z"; }
|
||||
|
||||
/* Tokens */
|
||||
// Tokens
|
||||
|
||||
S "whitespace"
|
||||
= comment* s
|
||||
|
@ -376,7 +374,7 @@ MEDIA_SYM "@media"
|
|||
CHARSET_SYM "@charset"
|
||||
= comment* "@charset "
|
||||
|
||||
/* We use |s| instead of |w| here to avoid infinite recursion. */
|
||||
// We use |s| instead of |w| here to avoid infinite recursion.
|
||||
IMPORTANT_SYM "!important"
|
||||
= comment* "!" (s / comment)* I M P O R T A N T
|
||||
|
||||
|
|
|
@ -1,36 +1,34 @@
|
|||
/*
|
||||
* JavaScript Grammar
|
||||
* ==================
|
||||
*
|
||||
* Based on grammar from ECMA-262, 5.1 Edition [1]. Generated parser builds a
|
||||
* syntax tree compatible with the ESTree spec [2].
|
||||
*
|
||||
* Limitations:
|
||||
*
|
||||
* * Non-BMP characters are completely ignored to avoid surrogate pair
|
||||
* handling.
|
||||
*
|
||||
* * One can create identifiers containing illegal characters using Unicode
|
||||
* escape sequences. For example, "abcd\u0020efgh" is not a valid
|
||||
* identifier, but it is accepted by the parser.
|
||||
*
|
||||
* * Strict mode is not recognized. This means that within strict mode code,
|
||||
* "implements", "interface", "let", "package", "private", "protected",
|
||||
* "public", "static" and "yield" can be used as names. Many other
|
||||
* restrictions and exceptions from Annex C are also not applied.
|
||||
*
|
||||
* All the limitations could be resolved, but the costs would likely outweigh
|
||||
* the benefits.
|
||||
*
|
||||
* Many thanks to inimino [3] for his grammar [4] which helped me to solve some
|
||||
* problems (such as automatic semicolon insertion) and also served to double
|
||||
* check that I converted the original grammar correctly.
|
||||
*
|
||||
* [1] http://www.ecma-international.org/publications/standards/Ecma-262.htm
|
||||
* [2] https://github.com/estree/estree
|
||||
* [3] http://inimino.org/~inimino/blog/
|
||||
* [4] http://boshi.inimino.org/3box/asof/1270029991384/PEG/ECMAScript_unified.peg
|
||||
*/
|
||||
// JavaScript Grammar
|
||||
// ==================
|
||||
//
|
||||
// Based on grammar from ECMA-262, 5.1 Edition [1]. Generated parser builds a
|
||||
// syntax tree compatible with the ESTree spec [2].
|
||||
//
|
||||
// Limitations:
|
||||
//
|
||||
// * Non-BMP characters are completely ignored to avoid surrogate pair
|
||||
// handling.
|
||||
//
|
||||
// * One can create identifiers containing illegal characters using Unicode
|
||||
// escape sequences. For example, "abcd\u0020efgh" is not a valid
|
||||
// identifier, but it is accepted by the parser.
|
||||
//
|
||||
// * Strict mode is not recognized. This means that within strict mode code,
|
||||
// "implements", "interface", "let", "package", "private", "protected",
|
||||
// "public", "static" and "yield" can be used as names. Many other
|
||||
// restrictions and exceptions from Annex C are also not applied.
|
||||
//
|
||||
// All the limitations could be resolved, but the costs would likely outweigh
|
||||
// the benefits.
|
||||
//
|
||||
// Many thanks to inimino [3] for his grammar [4] which helped me to solve some
|
||||
// problems (such as automatic semicolon insertion) and also served to double
|
||||
// check that I converted the original grammar correctly.
|
||||
//
|
||||
// [1] http://www.ecma-international.org/publications/standards/Ecma-262.htm
|
||||
// [2] https://github.com/estree/estree
|
||||
// [3] http://inimino.org/~inimino/blog/
|
||||
// [4] http://boshi.inimino.org/3box/asof/1270029991384/PEG/ECMAScript_unified.peg
|
||||
|
||||
{
|
||||
var TYPES_TO_PROPERTY_NAMES = {
|
||||
|
@ -85,7 +83,7 @@
|
|||
Start
|
||||
= __ program:Program __ { return program; }
|
||||
|
||||
/* ----- A.1 Lexical Grammar ----- */
|
||||
// ----- A.1 Lexical Grammar -----
|
||||
|
||||
SourceCharacter
|
||||
= .
|
||||
|
@ -222,10 +220,8 @@ BooleanLiteral
|
|||
= TrueToken { return { type: "Literal", value: true }; }
|
||||
/ FalseToken { return { type: "Literal", value: false }; }
|
||||
|
||||
/*
|
||||
* The "!(IdentifierStart / DecimalDigit)" predicate is not part of the official
|
||||
* grammar, it comes from text in section 7.8.3.
|
||||
*/
|
||||
// The "!(IdentifierStart / DecimalDigit)" predicate is not part of the official
|
||||
// grammar, it comes from text in section 7.8.3.
|
||||
NumericLiteral "number"
|
||||
= literal:HexIntegerLiteral !(IdentifierStart / DecimalDigit) {
|
||||
return literal;
|
||||
|
@ -375,30 +371,28 @@ RegularExpressionClassChar
|
|||
RegularExpressionFlags
|
||||
= IdentifierPart*
|
||||
|
||||
/*
|
||||
* Unicode Character Categories
|
||||
*
|
||||
* Extracted from the following Unicode Character Database file:
|
||||
*
|
||||
* http://www.unicode.org/Public/8.0.0/ucd/extracted/DerivedGeneralCategory.txt
|
||||
*
|
||||
* Unix magic used:
|
||||
*
|
||||
* grep "; $CATEGORY" DerivedGeneralCategory.txt | # Filter characters
|
||||
* cut -f1 -d " " | # Extract code points
|
||||
* grep -v '[0-9a-fA-F]\{5\}' | # Exclude non-BMP characters
|
||||
* sed -e 's/\.\./-/' | # Adjust formatting
|
||||
* sed -e 's/\([0-9a-fA-F]\{4\}\)/\\u\1/g' | # Adjust formatting
|
||||
* tr -d '\n' # Join lines
|
||||
*
|
||||
* ECMA-262 allows using Unicode 3.0 or later, version 8.0.0 was the latest one
|
||||
* at the time of writing.
|
||||
*
|
||||
* Non-BMP characters are completely ignored to avoid surrogate pair handling
|
||||
* (detecting surrogate pairs isn't possible with a simple character class and
|
||||
* other methods would degrade performance). I don't consider it a big deal as
|
||||
* even parsers in JavaScript engines of common browsers seem to ignore them.
|
||||
*/
|
||||
// Unicode Character Categories
|
||||
//
|
||||
// Extracted from the following Unicode Character Database file:
|
||||
//
|
||||
// http://www.unicode.org/Public/8.0.0/ucd/extracted/DerivedGeneralCategory.txt
|
||||
//
|
||||
// Unix magic used:
|
||||
//
|
||||
// grep "; $CATEGORY" DerivedGeneralCategory.txt | # Filter characters
|
||||
// cut -f1 -d " " | # Extract code points
|
||||
// grep -v '[0-9a-fA-F]\{5\}' | # Exclude non-BMP characters
|
||||
// sed -e 's/\.\./-/' | # Adjust formatting
|
||||
// sed -e 's/\([0-9a-fA-F]\{4\}\)/\\u\1/g' | # Adjust formatting
|
||||
// tr -d '\n' # Join lines
|
||||
//
|
||||
// ECMA-262 allows using Unicode 3.0 or later, version 8.0.0 was the latest one
|
||||
// at the time of writing.
|
||||
//
|
||||
// Non-BMP characters are completely ignored to avoid surrogate pair handling
|
||||
// (detecting surrogate pairs isn't possible with a simple character class and
|
||||
// other methods would degrade performance). I don't consider it a big deal as
|
||||
// even parsers in JavaScript engines of common browsers seem to ignore them.
|
||||
|
||||
// Letter, Lowercase
|
||||
Ll = [\u0061-\u007A\u00B5\u00DF-\u00F6\u00F8-\u00FF\u0101\u0103\u0105\u0107\u0109\u010B\u010D\u010F\u0111\u0113\u0115\u0117\u0119\u011B\u011D\u011F\u0121\u0123\u0125\u0127\u0129\u012B\u012D\u012F\u0131\u0133\u0135\u0137-\u0138\u013A\u013C\u013E\u0140\u0142\u0144\u0146\u0148-\u0149\u014B\u014D\u014F\u0151\u0153\u0155\u0157\u0159\u015B\u015D\u015F\u0161\u0163\u0165\u0167\u0169\u016B\u016D\u016F\u0171\u0173\u0175\u0177\u017A\u017C\u017E-\u0180\u0183\u0185\u0188\u018C-\u018D\u0192\u0195\u0199-\u019B\u019E\u01A1\u01A3\u01A5\u01A8\u01AA-\u01AB\u01AD\u01B0\u01B4\u01B6\u01B9-\u01BA\u01BD-\u01BF\u01C6\u01C9\u01CC\u01CE\u01D0\u01D2\u01D4\u01D6\u01D8\u01DA\u01DC-\u01DD\u01DF\u01E1\u01E3\u01E5\u01E7\u01E9\u01EB\u01ED\u01EF-\u01F0\u01F3\u01F5\u01F9\u01FB\u01FD\u01FF\u0201\u0203\u0205\u0207\u0209\u020B\u020D\u020F\u0211\u0213\u0215\u0217\u0219\u021B\u021D\u021F\u0221\u0223\u0225\u0227\u0229\u022B\u022D\u022F\u0231\u0233-\u0239\u023C\u023F-\u0240\u0242\u0247\u0249\u024B\u024D\u024F-\u0293\u0295-\u02AF\u0371\u0373\u0377\u037B-\u037D\u0390\u03AC-\u03CE\u03D0-\u03D1\u03D5-\u03D7\u03D9\u03DB\u03DD\u03DF\u03E1\u03E3\u03E5\u03E7\u03E9\u03EB\u03ED\u03EF-\u03F3\u03F5\u03F8\u03FB-\u03FC\u0430-\u045F\u0461\u0463\u0465\u0467\u0469\u046B\u046D\u046F\u0471\u0473\u0475\u0477\u0479\u047B\u047D\u047F\u0481\u048B\u048D\u048F\u0491\u0493\u0495\u0497\u0499\u049B\u049D\u049F\u04A1\u04A3\u04A5\u04A7\u04A9\u04AB\u04AD\u04AF\u04B1\u04B3\u04B5\u04B7\u04B9\u04BB\u04BD\u04BF\u04C2\u04C4\u04C6\u04C8\u04CA\u04CC\u04CE-\u04CF\u04D1\u04D3\u04D5\u04D7\u04D9\u04DB\u04DD\u04DF\u04E1\u04E3\u04E5\u04E7\u04E9\u04EB\u04ED\u04EF\u04F1\u04F3\u04F5\u04F7\u04F9\u04FB\u04FD\u04FF\u0501\u0503\u0505\u0507\u0509\u050B\u050D\u050F\u0511\u0513\u0515\u0517\u0519\u051B\u051D\u051F\u0521\u0523\u0525\u0527\u0529\u052B\u052D\u052F\u0561-\u0587\u13F8-\u13FD\u1D00-\u1D2B\u1D6B-\u1D77\u1D79-\u1D9A\u1E01\u1E03\u1E05\u1E07\u1E09\u1E0B\u1E0D\u1E0F\u1E11\u1E13\u1E15\u1E17\u1E19\u1E1B\u1E1D\u1E1F\u1E21\u1E23\u1E25\u1E27\u1E29\u1E2B\u1E2D\u1E2F\u1E31\u1E33\u1E35\u1E37\u1E39\u1E3B\u1E3D\u1E3F\u1E41\u1E43\u1E45\u1E47\u1E49\u1E4B\u1E4D\u1E4F\u1E51\u1E53\u1E55\u1E57\u1E59\u1E5B\u1E5D\u1E5F\u1E61\u1E63\u1E65\u1E67\u1E69\u1E6B\u1E6D\u1E6F\u1E71\u1E73\u1E75\u1E77\u1E79\u1E7B\u1E7D\u1E7F\u1E81\u1E83\u1E85\u1E87\u1E89\u1E8B\u1E8D\u1E8F\u1E91\u1E93\u1E95-\u1E9D\u1E9F\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7\u1EB9\u1EBB\u1EBD\u1EBF\u1EC1\u1EC3\u1EC5\u1EC7\u1EC9\u1ECB\u1ECD\u1ECF\u1ED1\u1ED3\u1ED5\u1ED7\u1ED9\u1EDB\u1EDD\u1EDF\u1EE1\u1EE3\u1EE5\u1EE7\u1EE9\u1EEB\u1EED\u1EEF\u1EF1\u1EF3\u1EF5\u1EF7\u1EF9\u1EFB\u1EFD\u1EFF-\u1F07\u1F10-\u1F15\u1F20-\u1F27\u1F30-\u1F37\u1F40-\u1F45\u1F50-\u1F57\u1F60-\u1F67\u1F70-\u1F7D\u1F80-\u1F87\u1F90-\u1F97\u1FA0-\u1FA7\u1FB0-\u1FB4\u1FB6-\u1FB7\u1FBE\u1FC2-\u1FC4\u1FC6-\u1FC7\u1FD0-\u1FD3\u1FD6-\u1FD7\u1FE0-\u1FE7\u1FF2-\u1FF4\u1FF6-\u1FF7\u210A\u210E-\u210F\u2113\u212F\u2134\u2139\u213C-\u213D\u2146-\u2149\u214E\u2184\u2C30-\u2C5E\u2C61\u2C65-\u2C66\u2C68\u2C6A\u2C6C\u2C71\u2C73-\u2C74\u2C76-\u2C7B\u2C81\u2C83\u2C85\u2C87\u2C89\u2C8B\u2C8D\u2C8F\u2C91\u2C93\u2C95\u2C97\u2C99\u2C9B\u2C9D\u2C9F\u2CA1\u2CA3\u2CA5\u2CA7\u2CA9\u2CAB\u2CAD\u2CAF\u2CB1\u2CB3\u2CB5\u2CB7\u2CB9\u2CBB\u2CBD\u2CBF\u2CC1\u2CC3\u2CC5\u2CC7\u2CC9\u2CCB\u2CCD\u2CCF\u2CD1\u2CD3\u2CD5\u2CD7\u2CD9\u2CDB\u2CDD\u2CDF\u2CE1\u2CE3-\u2CE4\u2CEC\u2CEE\u2CF3\u2D00-\u2D25\u2D27\u2D2D\uA641\uA643\uA645\uA647\uA649\uA64B\uA64D\uA64F\uA651\uA653\uA655\uA657\uA659\uA65B\uA65D\uA65F\uA661\uA663\uA665\uA667\uA669\uA66B\uA66D\uA681\uA683\uA685\uA687\uA689\uA68B\uA68D\uA68F\uA691\uA693\uA695\uA697\uA699\uA69B\uA723\uA725\uA727\uA729\uA72B\uA72D\uA72F-\uA731\uA733\uA735\uA737\uA739\uA73B\uA73D\uA73F\uA741\uA743\uA745\uA747\uA749\uA74B\uA74D\uA74F\uA751\uA753\uA755\uA757\uA759\uA75B\uA75D\uA75F\uA761\uA763\uA765\uA767\uA769\uA76B\uA76D\uA76F\uA771-\uA778\uA77A\uA77C\uA77F\uA781\uA783\uA785\uA787\uA78C\uA78E\uA791\uA793-\uA795\uA797\uA799\uA79B\uA79D\uA79F\uA7A1\uA7A3\uA7A5\uA7A7\uA7A9\uA7B5\uA7B7\uA7FA\uAB30-\uAB5A\uAB60-\uAB65\uAB70-\uABBF\uFB00-\uFB06\uFB13-\uFB17\uFF41-\uFF5A]
|
||||
|
@ -433,7 +427,7 @@ Pc = [\u005F\u203F-\u2040\u2054\uFE33-\uFE34\uFE4D-\uFE4F\uFF3F]
|
|||
// Separator, Space
|
||||
Zs = [\u0020\u00A0\u1680\u2000-\u200A\u202F\u205F\u3000]
|
||||
|
||||
/* Tokens */
|
||||
// Tokens
|
||||
|
||||
BreakToken = "break" !IdentifierPart
|
||||
CaseToken = "case" !IdentifierPart
|
||||
|
@ -474,7 +468,7 @@ VoidToken = "void" !IdentifierPart
|
|||
WhileToken = "while" !IdentifierPart
|
||||
WithToken = "with" !IdentifierPart
|
||||
|
||||
/* Skipped */
|
||||
// Skipped
|
||||
|
||||
__
|
||||
= (WhiteSpace / LineTerminatorSequence / Comment)*
|
||||
|
@ -482,7 +476,7 @@ __
|
|||
_
|
||||
= (WhiteSpace / MultiLineCommentNoLineTerminator)*
|
||||
|
||||
/* Automatic Semicolon Insertion */
|
||||
// Automatic Semicolon Insertion
|
||||
|
||||
EOS
|
||||
= __ ";"
|
||||
|
@ -493,11 +487,11 @@ EOS
|
|||
EOF
|
||||
= !.
|
||||
|
||||
/* ----- A.2 Number Conversions ----- */
|
||||
// ----- A.2 Number Conversions -----
|
||||
|
||||
/* Irrelevant. */
|
||||
// Irrelevant.
|
||||
|
||||
/* ----- A.3 Expressions ----- */
|
||||
// ----- A.3 Expressions -----
|
||||
|
||||
PrimaryExpression
|
||||
= ThisToken { return { type: "ThisExpression" }; }
|
||||
|
@ -962,7 +956,7 @@ ExpressionNoIn
|
|||
: head;
|
||||
}
|
||||
|
||||
/* ----- A.4 Statements ----- */
|
||||
// ----- A.4 Statements -----
|
||||
|
||||
Statement
|
||||
= Block
|
||||
|
@ -1274,7 +1268,7 @@ Finally
|
|||
DebuggerStatement
|
||||
= DebuggerToken EOS { return { type: "DebuggerStatement" }; }
|
||||
|
||||
/* ----- A.5 Functions and Programs ----- */
|
||||
// ----- A.5 Functions and Programs -----
|
||||
|
||||
FunctionDeclaration
|
||||
= FunctionToken __ id:Identifier __
|
||||
|
@ -1332,14 +1326,14 @@ SourceElement
|
|||
= Statement
|
||||
/ FunctionDeclaration
|
||||
|
||||
/* ----- A.6 Universal Resource Identifier Character Classes ----- */
|
||||
// ----- A.6 Universal Resource Identifier Character Classes -----
|
||||
|
||||
/* Irrelevant. */
|
||||
// Irrelevant.
|
||||
|
||||
/* ----- A.7 Regular Expressions ----- */
|
||||
// ----- A.7 Regular Expressions -----
|
||||
|
||||
/* Irrelevant. */
|
||||
// Irrelevant.
|
||||
|
||||
/* ----- A.8 JSON ----- */
|
||||
// ----- A.8 JSON -----
|
||||
|
||||
/* Irrelevant. */
|
||||
// Irrelevant.
|
||||
|
|
|
@ -1,21 +1,19 @@
|
|||
/*
|
||||
* JSON Grammar
|
||||
* ============
|
||||
*
|
||||
* Based on the grammar from RFC 7159 [1].
|
||||
*
|
||||
* Note that JSON is also specified in ECMA-262 [2], ECMA-404 [3], and on the
|
||||
* JSON website [4] (somewhat informally). The RFC seems the most authoritative
|
||||
* source, which is confirmed e.g. by [5].
|
||||
*
|
||||
* [1] http://tools.ietf.org/html/rfc7159
|
||||
* [2] http://www.ecma-international.org/publications/standards/Ecma-262.htm
|
||||
* [3] http://www.ecma-international.org/publications/standards/Ecma-404.htm
|
||||
* [4] http://json.org/
|
||||
* [5] https://www.tbray.org/ongoing/When/201x/2014/03/05/RFC7159-JSON
|
||||
*/
|
||||
// JSON Grammar
|
||||
// ============
|
||||
//
|
||||
// Based on the grammar from RFC 7159 [1].
|
||||
//
|
||||
// Note that JSON is also specified in ECMA-262 [2], ECMA-404 [3], and on the
|
||||
// JSON website [4] (somewhat informally). The RFC seems the most authoritative
|
||||
// source, which is confirmed e.g. by [5].
|
||||
//
|
||||
// [1] http://tools.ietf.org/html/rfc7159
|
||||
// [2] http://www.ecma-international.org/publications/standards/Ecma-262.htm
|
||||
// [3] http://www.ecma-international.org/publications/standards/Ecma-404.htm
|
||||
// [4] http://json.org/
|
||||
// [5] https://www.tbray.org/ongoing/When/201x/2014/03/05/RFC7159-JSON
|
||||
|
||||
/* ----- 2. JSON Grammar ----- */
|
||||
// ----- 2. JSON Grammar -----
|
||||
|
||||
JSON_text
|
||||
= ws value:value ws { return value; }
|
||||
|
@ -29,7 +27,7 @@ value_separator = ws "," ws
|
|||
|
||||
ws "whitespace" = [ \t\n\r]*
|
||||
|
||||
/* ----- 3. Values ----- */
|
||||
// ----- 3. Values -----
|
||||
|
||||
value
|
||||
= false
|
||||
|
@ -44,7 +42,7 @@ false = "false" { return false; }
|
|||
null = "null" { return null; }
|
||||
true = "true" { return true; }
|
||||
|
||||
/* ----- 4. Objects ----- */
|
||||
// ----- 4. Objects -----
|
||||
|
||||
object
|
||||
= begin_object
|
||||
|
@ -69,7 +67,7 @@ member
|
|||
return { name: name, value: value };
|
||||
}
|
||||
|
||||
/* ----- 5. Arrays ----- */
|
||||
// ----- 5. Arrays -----
|
||||
|
||||
array
|
||||
= begin_array
|
||||
|
@ -81,7 +79,7 @@ array
|
|||
end_array
|
||||
{ return values !== null ? values : []; }
|
||||
|
||||
/* ----- 6. Numbers ----- */
|
||||
// ----- 6. Numbers -----
|
||||
|
||||
number "number"
|
||||
= minus? int frac? exp? { return parseFloat(text()); }
|
||||
|
@ -96,7 +94,7 @@ minus = "-"
|
|||
plus = "+"
|
||||
zero = "0"
|
||||
|
||||
/* ----- 7. Strings ----- */
|
||||
// ----- 7. Strings -----
|
||||
|
||||
string "string"
|
||||
= quotation_mark chars:char* quotation_mark { return chars.join(""); }
|
||||
|
@ -123,8 +121,8 @@ escape = "\\"
|
|||
quotation_mark = '"'
|
||||
unescaped = [^\0-\x1F\x22\x5C]
|
||||
|
||||
/* ----- Core ABNF Rules ----- */
|
||||
// ----- Core ABNF Rules -----
|
||||
|
||||
/* See RFC 4234, Appendix B (http://tools.ietf.org/html/rfc4627). */
|
||||
// See RFC 4234, Appendix B (http://tools.ietf.org/html/rfc4627).
|
||||
DIGIT = [0-9]
|
||||
HEXDIG = [0-9a-f]i
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
let visitor = require("./visitor");
|
||||
|
||||
/* AST utilities. */
|
||||
// AST utilities.
|
||||
let asts = {
|
||||
findRule: function(ast, name) {
|
||||
for (let i = 0; i < ast.rules.length; i++) {
|
||||
|
|
|
@ -17,19 +17,15 @@ function processOptions(options, defaults) {
|
|||
}
|
||||
|
||||
let compiler = {
|
||||
/*
|
||||
* AST node visitor builder. Useful mainly for plugins which manipulate the
|
||||
* AST.
|
||||
*/
|
||||
// AST node visitor builder. Useful mainly for plugins which manipulate the
|
||||
// AST.
|
||||
visitor: require("./visitor"),
|
||||
|
||||
/*
|
||||
* Compiler passes.
|
||||
*
|
||||
* Each pass is a function that is passed the AST. It can perform checks on it
|
||||
* or modify it as needed. If the pass encounters a semantic error, it throws
|
||||
* |peg.GrammarError|.
|
||||
*/
|
||||
// Compiler passes.
|
||||
//
|
||||
// Each pass is a function that is passed the AST. It can perform checks on it
|
||||
// or modify it as needed. If the pass encounters a semantic error, it throws
|
||||
// |peg.GrammarError|.
|
||||
passes: {
|
||||
check: {
|
||||
reportUndefinedRules: require("./passes/report-undefined-rules"),
|
||||
|
@ -47,12 +43,10 @@ let compiler = {
|
|||
}
|
||||
},
|
||||
|
||||
/*
|
||||
* Generates a parser from a specified grammar AST. Throws |peg.GrammarError|
|
||||
* if the AST contains a semantic error. Note that not all errors are detected
|
||||
* during the generation and some may protrude to the generated parser and
|
||||
* cause its malfunction.
|
||||
*/
|
||||
// Generates a parser from a specified grammar AST. Throws |peg.GrammarError|
|
||||
// if the AST contains a semantic error. Note that not all errors are detected
|
||||
// during the generation and some may protrude to the generated parser and
|
||||
// cause its malfunction.
|
||||
compile: function(ast, passes, options) {
|
||||
options = options !== undefined ? options : {};
|
||||
|
||||
|
|
|
@ -2,17 +2,15 @@
|
|||
|
||||
function hex(ch) { return ch.charCodeAt(0).toString(16).toUpperCase(); }
|
||||
|
||||
/* JavaScript code generation helpers. */
|
||||
// JavaScript code generation helpers.
|
||||
let js = {
|
||||
stringEscape: function(s) {
|
||||
/*
|
||||
* ECMA-262, 5th ed., 7.8.4: All characters may appear literally in a string
|
||||
* literal except for the closing quote character, backslash, carriage
|
||||
* return, line separator, paragraph separator, and line feed. Any character
|
||||
* may appear in the form of an escape sequence.
|
||||
*
|
||||
* For portability, we also escape all control and non-ASCII characters.
|
||||
*/
|
||||
// ECMA-262, 5th ed., 7.8.4: All characters may appear literally in a string
|
||||
// literal except for the closing quote character, backslash, carriage
|
||||
// return, line separator, paragraph separator, and line feed. Any character
|
||||
// may appear in the form of an escape sequence.
|
||||
//
|
||||
// For portability, we also escape all control and non-ASCII characters.
|
||||
return s
|
||||
.replace(/\\/g, '\\\\') // backslash
|
||||
.replace(/"/g, '\\"') // closing double quote
|
||||
|
@ -30,11 +28,9 @@ let js = {
|
|||
},
|
||||
|
||||
regexpClassEscape: function(s) {
|
||||
/*
|
||||
* Based on ECMA-262, 5th ed., 7.8.5 & 15.10.1.
|
||||
*
|
||||
* For portability, we also escape all control and non-ASCII characters.
|
||||
*/
|
||||
// Based on ECMA-262, 5th ed., 7.8.5 & 15.10.1.
|
||||
//
|
||||
// For portability, we also escape all control and non-ASCII characters.
|
||||
return s
|
||||
.replace(/\\/g, '\\\\') // backslash
|
||||
.replace(/\//g, '\\/') // closing slash
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
"use strict";
|
||||
|
||||
/* Bytecode instruction opcodes. */
|
||||
// Bytecode instruction opcodes.
|
||||
let opcodes = {
|
||||
/* Stack Manipulation */
|
||||
// Stack Manipulation
|
||||
|
||||
PUSH: 0, // PUSH c
|
||||
PUSH_UNDEFINED: 1, // PUSH_UNDEFINED
|
||||
|
@ -18,14 +18,14 @@ let opcodes = {
|
|||
WRAP: 11, // WRAP n
|
||||
TEXT: 12, // TEXT
|
||||
|
||||
/* Conditions and Loops */
|
||||
// Conditions and Loops
|
||||
|
||||
IF: 13, // IF t, f
|
||||
IF_ERROR: 14, // IF_ERROR t, f
|
||||
IF_NOT_ERROR: 15, // IF_NOT_ERROR t, f
|
||||
WHILE_NOT_ERROR: 16, // WHILE_NOT_ERROR b
|
||||
|
||||
/* Matching */
|
||||
// Matching
|
||||
|
||||
MATCH_ANY: 17, // MATCH_ANY a, f, ...
|
||||
MATCH_STRING: 18, // MATCH_STRING s, a, f, ...
|
||||
|
@ -35,17 +35,17 @@ let opcodes = {
|
|||
ACCEPT_STRING: 22, // ACCEPT_STRING s
|
||||
FAIL: 23, // FAIL e
|
||||
|
||||
/* Calls */
|
||||
// Calls
|
||||
|
||||
LOAD_SAVED_POS: 24, // LOAD_SAVED_POS p
|
||||
UPDATE_SAVED_POS: 25, // UPDATE_SAVED_POS
|
||||
CALL: 26, // CALL f, n, pc, p1, p2, ..., pN
|
||||
|
||||
/* Rules */
|
||||
// Rules
|
||||
|
||||
RULE: 27, // RULE r
|
||||
|
||||
/* Failure Reporting */
|
||||
// Failure Reporting
|
||||
|
||||
SILENT_FAILS_ON: 28, // SILENT_FAILS_ON
|
||||
SILENT_FAILS_OFF: 29 // SILENT_FAILS_OFF
|
||||
|
|
|
@ -5,189 +5,188 @@ let visitor = require("../visitor");
|
|||
let op = require("../opcodes");
|
||||
let js = require("../js");
|
||||
|
||||
/* Generates bytecode.
|
||||
*
|
||||
* Instructions
|
||||
* ============
|
||||
*
|
||||
* Stack Manipulation
|
||||
* ------------------
|
||||
*
|
||||
* [0] PUSH c
|
||||
*
|
||||
* stack.push(consts[c]);
|
||||
*
|
||||
* [1] PUSH_UNDEFINED
|
||||
*
|
||||
* stack.push(undefined);
|
||||
*
|
||||
* [2] PUSH_NULL
|
||||
*
|
||||
* stack.push(null);
|
||||
*
|
||||
* [3] PUSH_FAILED
|
||||
*
|
||||
* stack.push(FAILED);
|
||||
*
|
||||
* [4] PUSH_EMPTY_ARRAY
|
||||
*
|
||||
* stack.push([]);
|
||||
*
|
||||
* [5] PUSH_CURR_POS
|
||||
*
|
||||
* stack.push(currPos);
|
||||
*
|
||||
* [6] POP
|
||||
*
|
||||
* stack.pop();
|
||||
*
|
||||
* [7] POP_CURR_POS
|
||||
*
|
||||
* currPos = stack.pop();
|
||||
*
|
||||
* [8] POP_N n
|
||||
*
|
||||
* stack.pop(n);
|
||||
*
|
||||
* [9] NIP
|
||||
*
|
||||
* value = stack.pop();
|
||||
* stack.pop();
|
||||
* stack.push(value);
|
||||
*
|
||||
* [10] APPEND
|
||||
*
|
||||
* value = stack.pop();
|
||||
* array = stack.pop();
|
||||
* array.push(value);
|
||||
* stack.push(array);
|
||||
*
|
||||
* [11] WRAP n
|
||||
*
|
||||
* stack.push(stack.pop(n));
|
||||
*
|
||||
* [12] TEXT
|
||||
*
|
||||
* stack.push(input.substring(stack.pop(), currPos));
|
||||
*
|
||||
* Conditions and Loops
|
||||
* --------------------
|
||||
*
|
||||
* [13] IF t, f
|
||||
*
|
||||
* if (stack.top()) {
|
||||
* interpret(ip + 3, ip + 3 + t);
|
||||
* } else {
|
||||
* interpret(ip + 3 + t, ip + 3 + t + f);
|
||||
* }
|
||||
*
|
||||
* [14] IF_ERROR t, f
|
||||
*
|
||||
* if (stack.top() === FAILED) {
|
||||
* interpret(ip + 3, ip + 3 + t);
|
||||
* } else {
|
||||
* interpret(ip + 3 + t, ip + 3 + t + f);
|
||||
* }
|
||||
*
|
||||
* [15] IF_NOT_ERROR t, f
|
||||
*
|
||||
* if (stack.top() !== FAILED) {
|
||||
* interpret(ip + 3, ip + 3 + t);
|
||||
* } else {
|
||||
* interpret(ip + 3 + t, ip + 3 + t + f);
|
||||
* }
|
||||
*
|
||||
* [16] WHILE_NOT_ERROR b
|
||||
*
|
||||
* while(stack.top() !== FAILED) {
|
||||
* interpret(ip + 2, ip + 2 + b);
|
||||
* }
|
||||
*
|
||||
* Matching
|
||||
* --------
|
||||
*
|
||||
* [17] MATCH_ANY a, f, ...
|
||||
*
|
||||
* if (input.length > currPos) {
|
||||
* interpret(ip + 3, ip + 3 + a);
|
||||
* } else {
|
||||
* interpret(ip + 3 + a, ip + 3 + a + f);
|
||||
* }
|
||||
*
|
||||
* [18] MATCH_STRING s, a, f, ...
|
||||
*
|
||||
* if (input.substr(currPos, consts[s].length) === consts[s]) {
|
||||
* interpret(ip + 4, ip + 4 + a);
|
||||
* } else {
|
||||
* interpret(ip + 4 + a, ip + 4 + a + f);
|
||||
* }
|
||||
*
|
||||
* [19] MATCH_STRING_IC s, a, f, ...
|
||||
*
|
||||
* if (input.substr(currPos, consts[s].length).toLowerCase() === consts[s]) {
|
||||
* interpret(ip + 4, ip + 4 + a);
|
||||
* } else {
|
||||
* interpret(ip + 4 + a, ip + 4 + a + f);
|
||||
* }
|
||||
*
|
||||
* [20] MATCH_REGEXP r, a, f, ...
|
||||
*
|
||||
* if (consts[r].test(input.charAt(currPos))) {
|
||||
* interpret(ip + 4, ip + 4 + a);
|
||||
* } else {
|
||||
* interpret(ip + 4 + a, ip + 4 + a + f);
|
||||
* }
|
||||
*
|
||||
* [21] ACCEPT_N n
|
||||
*
|
||||
* stack.push(input.substring(currPos, n));
|
||||
* currPos += n;
|
||||
*
|
||||
* [22] ACCEPT_STRING s
|
||||
*
|
||||
* stack.push(consts[s]);
|
||||
* currPos += consts[s].length;
|
||||
*
|
||||
* [23] FAIL e
|
||||
*
|
||||
* stack.push(FAILED);
|
||||
* fail(consts[e]);
|
||||
*
|
||||
* Calls
|
||||
* -----
|
||||
*
|
||||
* [24] LOAD_SAVED_POS p
|
||||
*
|
||||
* savedPos = stack[p];
|
||||
*
|
||||
* [25] UPDATE_SAVED_POS
|
||||
*
|
||||
* savedPos = currPos;
|
||||
*
|
||||
* [26] CALL f, n, pc, p1, p2, ..., pN
|
||||
*
|
||||
* value = consts[f](stack[p1], ..., stack[pN]);
|
||||
* stack.pop(n);
|
||||
* stack.push(value);
|
||||
*
|
||||
* Rules
|
||||
* -----
|
||||
*
|
||||
* [27] RULE r
|
||||
*
|
||||
* stack.push(parseRule(r));
|
||||
*
|
||||
* Failure Reporting
|
||||
* -----------------
|
||||
*
|
||||
* [28] SILENT_FAILS_ON
|
||||
*
|
||||
* silentFails++;
|
||||
*
|
||||
* [29] SILENT_FAILS_OFF
|
||||
*
|
||||
* silentFails--;
|
||||
*/
|
||||
// Generates bytecode.
|
||||
//
|
||||
// Instructions
|
||||
// ============
|
||||
//
|
||||
// Stack Manipulation
|
||||
// ------------------
|
||||
//
|
||||
// [0] PUSH c
|
||||
//
|
||||
// stack.push(consts[c]);
|
||||
//
|
||||
// [1] PUSH_UNDEFINED
|
||||
//
|
||||
// stack.push(undefined);
|
||||
//
|
||||
// [2] PUSH_NULL
|
||||
//
|
||||
// stack.push(null);
|
||||
//
|
||||
// [3] PUSH_FAILED
|
||||
//
|
||||
// stack.push(FAILED);
|
||||
//
|
||||
// [4] PUSH_EMPTY_ARRAY
|
||||
//
|
||||
// stack.push([]);
|
||||
//
|
||||
// [5] PUSH_CURR_POS
|
||||
//
|
||||
// stack.push(currPos);
|
||||
//
|
||||
// [6] POP
|
||||
//
|
||||
// stack.pop();
|
||||
//
|
||||
// [7] POP_CURR_POS
|
||||
//
|
||||
// currPos = stack.pop();
|
||||
//
|
||||
// [8] POP_N n
|
||||
//
|
||||
// stack.pop(n);
|
||||
//
|
||||
// [9] NIP
|
||||
//
|
||||
// value = stack.pop();
|
||||
// stack.pop();
|
||||
// stack.push(value);
|
||||
//
|
||||
// [10] APPEND
|
||||
//
|
||||
// value = stack.pop();
|
||||
// array = stack.pop();
|
||||
// array.push(value);
|
||||
// stack.push(array);
|
||||
//
|
||||
// [11] WRAP n
|
||||
//
|
||||
// stack.push(stack.pop(n));
|
||||
//
|
||||
// [12] TEXT
|
||||
//
|
||||
// stack.push(input.substring(stack.pop(), currPos));
|
||||
//
|
||||
// Conditions and Loops
|
||||
// --------------------
|
||||
//
|
||||
// [13] IF t, f
|
||||
//
|
||||
// if (stack.top()) {
|
||||
// interpret(ip + 3, ip + 3 + t);
|
||||
// } else {
|
||||
// interpret(ip + 3 + t, ip + 3 + t + f);
|
||||
// }
|
||||
//
|
||||
// [14] IF_ERROR t, f
|
||||
//
|
||||
// if (stack.top() === FAILED) {
|
||||
// interpret(ip + 3, ip + 3 + t);
|
||||
// } else {
|
||||
// interpret(ip + 3 + t, ip + 3 + t + f);
|
||||
// }
|
||||
//
|
||||
// [15] IF_NOT_ERROR t, f
|
||||
//
|
||||
// if (stack.top() !== FAILED) {
|
||||
// interpret(ip + 3, ip + 3 + t);
|
||||
// } else {
|
||||
// interpret(ip + 3 + t, ip + 3 + t + f);
|
||||
// }
|
||||
//
|
||||
// [16] WHILE_NOT_ERROR b
|
||||
//
|
||||
// while(stack.top() !== FAILED) {
|
||||
// interpret(ip + 2, ip + 2 + b);
|
||||
// }
|
||||
//
|
||||
// Matching
|
||||
// --------
|
||||
//
|
||||
// [17] MATCH_ANY a, f, ...
|
||||
//
|
||||
// if (input.length > currPos) {
|
||||
// interpret(ip + 3, ip + 3 + a);
|
||||
// } else {
|
||||
// interpret(ip + 3 + a, ip + 3 + a + f);
|
||||
// }
|
||||
//
|
||||
// [18] MATCH_STRING s, a, f, ...
|
||||
//
|
||||
// if (input.substr(currPos, consts[s].length) === consts[s]) {
|
||||
// interpret(ip + 4, ip + 4 + a);
|
||||
// } else {
|
||||
// interpret(ip + 4 + a, ip + 4 + a + f);
|
||||
// }
|
||||
//
|
||||
// [19] MATCH_STRING_IC s, a, f, ...
|
||||
//
|
||||
// if (input.substr(currPos, consts[s].length).toLowerCase() === consts[s]) {
|
||||
// interpret(ip + 4, ip + 4 + a);
|
||||
// } else {
|
||||
// interpret(ip + 4 + a, ip + 4 + a + f);
|
||||
// }
|
||||
//
|
||||
// [20] MATCH_REGEXP r, a, f, ...
|
||||
//
|
||||
// if (consts[r].test(input.charAt(currPos))) {
|
||||
// interpret(ip + 4, ip + 4 + a);
|
||||
// } else {
|
||||
// interpret(ip + 4 + a, ip + 4 + a + f);
|
||||
// }
|
||||
//
|
||||
// [21] ACCEPT_N n
|
||||
//
|
||||
// stack.push(input.substring(currPos, n));
|
||||
// currPos += n;
|
||||
//
|
||||
// [22] ACCEPT_STRING s
|
||||
//
|
||||
// stack.push(consts[s]);
|
||||
// currPos += consts[s].length;
|
||||
//
|
||||
// [23] FAIL e
|
||||
//
|
||||
// stack.push(FAILED);
|
||||
// fail(consts[e]);
|
||||
//
|
||||
// Calls
|
||||
// -----
|
||||
//
|
||||
// [24] LOAD_SAVED_POS p
|
||||
//
|
||||
// savedPos = stack[p];
|
||||
//
|
||||
// [25] UPDATE_SAVED_POS
|
||||
//
|
||||
// savedPos = currPos;
|
||||
//
|
||||
// [26] CALL f, n, pc, p1, p2, ..., pN
|
||||
//
|
||||
// value = consts[f](stack[p1], ..., stack[pN]);
|
||||
// stack.pop(n);
|
||||
// stack.push(value);
|
||||
//
|
||||
// Rules
|
||||
// -----
|
||||
//
|
||||
// [27] RULE r
|
||||
//
|
||||
// stack.push(parseRule(r));
|
||||
//
|
||||
// Failure Reporting
|
||||
// -----------------
|
||||
//
|
||||
// [28] SILENT_FAILS_ON
|
||||
//
|
||||
// silentFails++;
|
||||
//
|
||||
// [29] SILENT_FAILS_OFF
|
||||
//
|
||||
// silentFails--;
|
||||
function generateBytecode(ast) {
|
||||
let consts = [];
|
||||
|
||||
|
@ -308,12 +307,10 @@ function generateBytecode(ast) {
|
|||
'peg$otherExpectation("' + js.stringEscape(node.name) + '")'
|
||||
);
|
||||
|
||||
/*
|
||||
* The code generated below is slightly suboptimal because |FAIL| pushes
|
||||
* to the stack, so we need to stick a |POP| in front of it. We lack a
|
||||
* dedicated instruction that would just report the failure and not touch
|
||||
* the stack.
|
||||
*/
|
||||
// The code generated below is slightly suboptimal because |FAIL| pushes
|
||||
// to the stack, so we need to stick a |POP| in front of it. We lack a
|
||||
// dedicated instruction that would just report the failure and not touch
|
||||
// the stack.
|
||||
return buildSequence(
|
||||
[op.SILENT_FAILS_ON],
|
||||
generate(node.expression, context),
|
||||
|
@ -551,11 +548,9 @@ function generateBytecode(ast) {
|
|||
+ ')'
|
||||
);
|
||||
|
||||
/*
|
||||
* For case-sensitive strings the value must match the beginning of the
|
||||
* remaining input exactly. As a result, we can use |ACCEPT_STRING| and
|
||||
* save one |substr| call that would be needed if we used |ACCEPT_N|.
|
||||
*/
|
||||
// For case-sensitive strings the value must match the beginning of the
|
||||
// remaining input exactly. As a result, we can use |ACCEPT_STRING| and
|
||||
// save one |substr| call that would be needed if we used |ACCEPT_N|.
|
||||
return buildCondition(
|
||||
node.ignoreCase
|
||||
? [op.MATCH_STRING_IC, stringIndex]
|
||||
|
|
|
@ -4,9 +4,9 @@ let asts = require("../asts");
|
|||
let op = require("../opcodes");
|
||||
let js = require("../js");
|
||||
|
||||
/* Generates parser JavaScript code. */
|
||||
// Generates parser JavaScript code.
|
||||
function generateJS(ast, options) {
|
||||
/* These only indent non-empty lines to avoid trailing whitespace. */
|
||||
// These only indent non-empty lines to avoid trailing whitespace.
|
||||
function indent2(code) { return code.replace(/^(.+)$/gm, ' $1'); }
|
||||
function indent10(code) { return code.replace(/^(.+)$/gm, ' $1'); }
|
||||
|
||||
|
@ -222,13 +222,11 @@ function generateJS(ast, options) {
|
|||
parts.push(indent2(generateRuleHeader('peg$ruleNames[index]', 'index')));
|
||||
|
||||
parts.push([
|
||||
/*
|
||||
* The point of the outer loop and the |ips| & |ends| stacks is to avoid
|
||||
* recursive calls for interpreting parts of bytecode. In other words, we
|
||||
* implement the |interpret| operation of the abstract machine without
|
||||
* function calls. Such calls would likely slow the parser down and more
|
||||
* importantly cause stack overflows for complex grammars.
|
||||
*/
|
||||
// The point of the outer loop and the |ips| & |ends| stacks is to avoid
|
||||
// recursive calls for interpreting parts of bytecode. In other words, we
|
||||
// implement the |interpret| operation of the abstract machine without
|
||||
// function calls. Such calls would likely slow the parser down and more
|
||||
// importantly cause stack overflows for complex grammars.
|
||||
' while (true) {',
|
||||
' while (ip < end) {',
|
||||
' switch (bc[ip]) {',
|
||||
|
@ -1201,11 +1199,9 @@ function generateJS(ast, options) {
|
|||
function generateWrapper(toplevelCode) {
|
||||
function generateGeneratedByComment() {
|
||||
return [
|
||||
'/*',
|
||||
' * Generated by PEG.js 0.10.0.',
|
||||
' *',
|
||||
' * http://pegjs.org/',
|
||||
' */'
|
||||
'// Generated by PEG.js 0.10.0.',
|
||||
'//',
|
||||
'// http://pegjs.org/'
|
||||
].join('\n');
|
||||
}
|
||||
|
||||
|
|
|
@ -2,9 +2,7 @@
|
|||
|
||||
let visitor = require("../visitor");
|
||||
|
||||
/*
|
||||
* Removes proxy rules -- that is, rules that only delegate to other rule.
|
||||
*/
|
||||
// Removes proxy rules -- that is, rules that only delegate to other rule.
|
||||
function removeProxyRules(ast, options) {
|
||||
function isProxyRule(node) {
|
||||
return node.type === "rule" && node.expression.type === "rule_ref";
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
let GrammarError = require("../../grammar-error");
|
||||
let visitor = require("../visitor");
|
||||
|
||||
/* Checks that each label is defined only once within each scope. */
|
||||
// Checks that each label is defined only once within each scope.
|
||||
function reportDuplicateLabels(ast) {
|
||||
function cloneEnv(env) {
|
||||
let clone = {};
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
let GrammarError = require("../../grammar-error");
|
||||
let visitor = require("../visitor");
|
||||
|
||||
/* Checks that each rule is defined only once. */
|
||||
// Checks that each rule is defined only once.
|
||||
function reportDuplicateRules(ast) {
|
||||
let rules = {};
|
||||
|
||||
|
|
|
@ -4,18 +4,16 @@ let GrammarError = require("../../grammar-error");
|
|||
let asts = require("../asts");
|
||||
let visitor = require("../visitor");
|
||||
|
||||
/*
|
||||
* Reports left recursion in the grammar, which prevents infinite recursion in
|
||||
* the generated parser.
|
||||
*
|
||||
* Both direct and indirect recursion is detected. The pass also correctly
|
||||
* reports cases like this:
|
||||
*
|
||||
* start = "a"? start
|
||||
*
|
||||
* In general, if a rule reference can be reached without consuming any input,
|
||||
* it can lead to left recursion.
|
||||
*/
|
||||
// Reports left recursion in the grammar, which prevents infinite recursion in
|
||||
// the generated parser.
|
||||
//
|
||||
// Both direct and indirect recursion is detected. The pass also correctly
|
||||
// reports cases like this:
|
||||
//
|
||||
// start = "a"? start
|
||||
//
|
||||
// In general, if a rule reference can be reached without consuming any input,
|
||||
// it can lead to left recursion.
|
||||
function reportInfiniteRecursion(ast) {
|
||||
let visitedRules = [];
|
||||
|
||||
|
|
|
@ -4,10 +4,8 @@ let GrammarError = require("../../grammar-error");
|
|||
let asts = require("../asts");
|
||||
let visitor = require("../visitor");
|
||||
|
||||
/*
|
||||
* Reports expressions that don't consume any input inside |*| or |+| in the
|
||||
* grammar, which prevents infinite loops in the generated parser.
|
||||
*/
|
||||
// Reports expressions that don't consume any input inside |*| or |+| in the
|
||||
// grammar, which prevents infinite loops in the generated parser.
|
||||
function reportInfiniteRepetition(ast) {
|
||||
let check = visitor.build({
|
||||
zero_or_more: function(node) {
|
||||
|
|
|
@ -4,7 +4,7 @@ let GrammarError = require("../../grammar-error");
|
|||
let asts = require("../asts");
|
||||
let visitor = require("../visitor");
|
||||
|
||||
/* Checks that all referenced rules exist. */
|
||||
// Checks that all referenced rules exist.
|
||||
function reportUndefinedRules(ast) {
|
||||
let check = visitor.build({
|
||||
rule_ref: function(node) {
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
"use strict";
|
||||
|
||||
/* Simple AST node visitor builder. */
|
||||
// Simple AST node visitor builder.
|
||||
let visitor = {
|
||||
build: function(functions) {
|
||||
function visit(node) {
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
"use strict";
|
||||
|
||||
/* Thrown when the grammar contains an error. */
|
||||
// Thrown when the grammar contains an error.
|
||||
class GrammarError {
|
||||
constructor(message, location) {
|
||||
this.name = "GrammarError";
|
||||
|
|
|
@ -1,11 +1,9 @@
|
|||
/* eslint-env node, amd */
|
||||
/* eslint no-unused-vars: 0 */
|
||||
|
||||
/*
|
||||
* Generated by PEG.js 0.10.0.
|
||||
*
|
||||
* http://pegjs.org/
|
||||
*/
|
||||
// Generated by PEG.js 0.10.0.
|
||||
//
|
||||
// http://pegjs.org/
|
||||
|
||||
"use strict";
|
||||
|
||||
|
@ -236,12 +234,10 @@ function peg$parse(input, options) {
|
|||
var peg$c29 = ")";
|
||||
var peg$c30 = peg$literalExpectation(")", false);
|
||||
var peg$c31 = function(expression) {
|
||||
/*
|
||||
* The purpose of the "group" AST node is just to isolate label scope. We
|
||||
* don't need to put it around nodes that can't contain any labels or
|
||||
* nodes that already isolate label scope themselves. This leaves us with
|
||||
* "labeled" and "sequence".
|
||||
*/
|
||||
// The purpose of the "group" AST node is just to isolate label scope. We
|
||||
// don't need to put it around nodes that can't contain any labels or
|
||||
// nodes that already isolate label scope themselves. This leaves us with
|
||||
// "labeled" and "sequence".
|
||||
return expression.type === 'labeled' || expression.type === 'sequence'
|
||||
? { type: "group", expression: expression }
|
||||
: expression;
|
||||
|
|
22
lib/peg.js
22
lib/peg.js
|
@ -1,24 +1,22 @@
|
|||
"use strict";
|
||||
|
||||
let peg = {
|
||||
/* PEG.js version (uses semantic versioning). */
|
||||
// PEG.js version (uses semantic versioning).
|
||||
VERSION: "0.10.0",
|
||||
|
||||
GrammarError: require("./grammar-error"),
|
||||
parser: require("./parser"),
|
||||
compiler: require("./compiler"),
|
||||
|
||||
/*
|
||||
* Generates a parser from a specified grammar and returns it.
|
||||
*
|
||||
* The grammar must be a string in the format described by the metagramar in
|
||||
* the parser.pegjs file.
|
||||
*
|
||||
* Throws |peg.parser.SyntaxError| if the grammar contains a syntax error or
|
||||
* |peg.GrammarError| if it contains a semantic error. Note that not all
|
||||
* errors are detected during the generation and some may protrude to the
|
||||
* generated parser and cause its malfunction.
|
||||
*/
|
||||
// Generates a parser from a specified grammar and returns it.
|
||||
//
|
||||
// The grammar must be a string in the format described by the metagramar in
|
||||
// the parser.pegjs file.
|
||||
//
|
||||
// Throws |peg.parser.SyntaxError| if the grammar contains a syntax error or
|
||||
// |peg.GrammarError| if it contains a semantic error. Note that not all
|
||||
// errors are detected during the generation and some may protrude to the
|
||||
// generated parser and cause its malfunction.
|
||||
generate: function(grammar, options) {
|
||||
options = options !== undefined ? options : {};
|
||||
|
||||
|
|
|
@ -26,10 +26,8 @@ describe("PEG.js API", function() {
|
|||
'c = "x"'
|
||||
].join("\n");
|
||||
|
||||
/*
|
||||
* The |allowedStartRules| option is implemented separately for each
|
||||
* optimization mode, so we need to test it in both.
|
||||
*/
|
||||
// The |allowedStartRules| option is implemented separately for each
|
||||
// optimization mode, so we need to test it in both.
|
||||
|
||||
describe("when optimizing for parsing speed", function() {
|
||||
describe("when |allowedStartRules| is not set", function() {
|
||||
|
@ -151,10 +149,8 @@ describe("PEG.js API", function() {
|
|||
});
|
||||
});
|
||||
|
||||
/*
|
||||
* The |optimize| option isn't tested because there is no meaningful way to
|
||||
* write the specs without turning this into a performance test.
|
||||
*/
|
||||
// The |optimize| option isn't tested because there is no meaningful way to
|
||||
// write the specs without turning this into a performance test.
|
||||
|
||||
describe("output", function() {
|
||||
let grammar = 'start = "a"';
|
||||
|
@ -187,13 +183,11 @@ describe("PEG.js API", function() {
|
|||
});
|
||||
});
|
||||
|
||||
/*
|
||||
* The |format|, |exportVars|, and |dependencies| options are not tested
|
||||
* becasue there is no meaningful way to thest their effects without turning
|
||||
* this into an integration test.
|
||||
*/
|
||||
// The |format|, |exportVars|, and |dependencies| options are not tested
|
||||
// becasue there is no meaningful way to thest their effects without turning
|
||||
// this into an integration test.
|
||||
|
||||
/* The |plugins| option is tested in plugin API specs. */
|
||||
// The |plugins| option is tested in plugin API specs.
|
||||
|
||||
it("accepts custom options", function() {
|
||||
peg.generate('start = "a"', { foo: 42 });
|
||||
|
|
|
@ -120,10 +120,8 @@ describe("generated parser behavior", function() {
|
|||
}
|
||||
});
|
||||
|
||||
/*
|
||||
* Stub out |console.log| so that the default tracer doesn't clutter
|
||||
* test output.
|
||||
*/
|
||||
// Stub out |console.log| so that the default tracer doesn't clutter
|
||||
// test output.
|
||||
if (typeof console === "object") {
|
||||
spyOn(console, "log");
|
||||
}
|
||||
|
@ -412,11 +410,9 @@ describe("generated parser behavior", function() {
|
|||
describe("positive semantic predicate", function() {
|
||||
describe("when the code returns a truthy value", function() {
|
||||
it("returns |undefined|", function() {
|
||||
/*
|
||||
* The |""| is needed so that the parser doesn't return just
|
||||
* |undefined| which we can't compare against in |toParse| due to the
|
||||
* way optional parameters work.
|
||||
*/
|
||||
// The |""| is needed so that the parser doesn't return just
|
||||
// |undefined| which we can't compare against in |toParse| due to the
|
||||
// way optional parameters work.
|
||||
let parser = peg.generate('start = &{ return true; } ""', options);
|
||||
|
||||
expect(parser).toParse("", [undefined, ""]);
|
||||
|
@ -592,7 +588,7 @@ describe("generated parser behavior", function() {
|
|||
end: { offset: 13, line: 7, column: 5 }
|
||||
});
|
||||
|
||||
/* Newline representations */
|
||||
// Newline representations
|
||||
expect(parser).toParse("1\nx", { // Unix
|
||||
start: { offset: 2, line: 2, column: 1 },
|
||||
end: { offset: 2, line: 2, column: 1 }
|
||||
|
@ -608,11 +604,9 @@ describe("generated parser behavior", function() {
|
|||
describe("negative semantic predicate", function() {
|
||||
describe("when the code returns a falsey value", function() {
|
||||
it("returns |undefined|", function() {
|
||||
/*
|
||||
* The |""| is needed so that the parser doesn't return just
|
||||
* |undefined| which we can't compare against in |toParse| due to the
|
||||
* way optional parameters work.
|
||||
*/
|
||||
// The |""| is needed so that the parser doesn't return just
|
||||
// |undefined| which we can't compare against in |toParse| due to the
|
||||
// way optional parameters work.
|
||||
let parser = peg.generate('start = !{ return false; } ""', options);
|
||||
|
||||
expect(parser).toParse("", [undefined, ""]);
|
||||
|
@ -788,7 +782,7 @@ describe("generated parser behavior", function() {
|
|||
end: { offset: 13, line: 7, column: 5 }
|
||||
});
|
||||
|
||||
/* Newline representations */
|
||||
// Newline representations
|
||||
expect(parser).toParse("1\nx", { // Unix
|
||||
start: { offset: 2, line: 2, column: 1 },
|
||||
end: { offset: 2, line: 2, column: 1 }
|
||||
|
@ -1166,7 +1160,7 @@ describe("generated parser behavior", function() {
|
|||
end: { offset: 14, line: 7, column: 6 }
|
||||
});
|
||||
|
||||
/* Newline representations */
|
||||
// Newline representations
|
||||
expect(parser).toParse("1\nx", { // Unix
|
||||
start: { offset: 2, line: 2, column: 1 },
|
||||
end: { offset: 3, line: 2, column: 2 }
|
||||
|
@ -1460,7 +1454,7 @@ describe("generated parser behavior", function() {
|
|||
}
|
||||
});
|
||||
|
||||
/* Newline representations */
|
||||
// Newline representations
|
||||
expect(parser).toFailToParse("1\nx", { // Old Mac
|
||||
location: {
|
||||
start: { offset: 2, line: 2, column: 1 },
|
||||
|
@ -1477,18 +1471,14 @@ describe("generated parser behavior", function() {
|
|||
});
|
||||
});
|
||||
|
||||
/*
|
||||
* Following examples are from Wikipedia, see
|
||||
* http://en.wikipedia.org/w/index.php?title=Parsing_expression_grammar&oldid=335106938.
|
||||
*/
|
||||
// Following examples are from Wikipedia, see
|
||||
// http://en.wikipedia.org/w/index.php?title=Parsing_expression_grammar&oldid=335106938.
|
||||
describe("complex examples", function() {
|
||||
it("handles arithmetics example correctly", function() {
|
||||
/*
|
||||
* Value ← [0-9]+ / '(' Expr ')'
|
||||
* Product ← Value (('*' / '/') Value)*
|
||||
* Sum ← Product (('+' / '-') Product)*
|
||||
* Expr ← Sum
|
||||
*/
|
||||
// Value ← [0-9]+ / '(' Expr ')'
|
||||
// Product ← Value (('*' / '/') Value)*
|
||||
// Sum ← Product (('+' / '-') Product)*
|
||||
// Expr ← Sum
|
||||
let parser = peg.generate([
|
||||
'Expr = Sum',
|
||||
'Sum = head:Product tail:(("+" / "-") Product)* {',
|
||||
|
@ -1507,40 +1497,39 @@ describe("generated parser behavior", function() {
|
|||
' / "(" expr:Expr ")" { return expr; }'
|
||||
].join("\n"), options);
|
||||
|
||||
/* The "value" rule */
|
||||
// The "value" rule
|
||||
expect(parser).toParse("0", 0);
|
||||
expect(parser).toParse("123", 123);
|
||||
expect(parser).toParse("(42+43)", 42+43);
|
||||
|
||||
/* The "product" rule */
|
||||
// The "product" rule
|
||||
expect(parser).toParse("42", 42);
|
||||
expect(parser).toParse("42*43", 42*43);
|
||||
expect(parser).toParse("42*43*44*45", 42*43*44*45);
|
||||
expect(parser).toParse("42/43", 42/43);
|
||||
expect(parser).toParse("42/43/44/45", 42/43/44/45);
|
||||
|
||||
/* The "sum" rule */
|
||||
// The "sum" rule
|
||||
expect(parser).toParse("42*43", 42*43);
|
||||
expect(parser).toParse("42*43+44*45", 42*43+44*45);
|
||||
expect(parser).toParse("42*43+44*45+46*47+48*49", 42*43+44*45+46*47+48*49);
|
||||
expect(parser).toParse("42*43-44*45", 42*43-44*45);
|
||||
expect(parser).toParse("42*43-44*45-46*47-48*49", 42*43-44*45-46*47-48*49);
|
||||
|
||||
/* The "expr" rule */
|
||||
// The "expr" rule
|
||||
expect(parser).toParse("42+43", 42+43);
|
||||
|
||||
/* Complex test */
|
||||
// Complex test
|
||||
expect(parser).toParse("(1+2)*(3+4)", (1+2)*(3+4));
|
||||
});
|
||||
|
||||
it("handles non-context-free language correctly", function() {
|
||||
/* The following parsing expression grammar describes the classic
|
||||
* non-context-free language { a^n b^n c^n : n >= 1 }:
|
||||
*
|
||||
* S ← &(A c) a+ B !(a/b/c)
|
||||
* A ← a A? b
|
||||
* B ← b B? c
|
||||
*/
|
||||
// The following parsing expression grammar describes the classic
|
||||
// non-context-free language { a^n b^n c^n : n >= 1 }:
|
||||
//
|
||||
// S ← &(A c) a+ B !(a/b/c)
|
||||
// A ← a A? b
|
||||
// B ← b B? c
|
||||
let parser = peg.generate([
|
||||
'S = &(A "c") a:"a"+ B:B !("a" / "b" / "c") { return a.join("") + B; }',
|
||||
'A = a:"a" A:A? b:"b" { return [a, A, b].join(""); }',
|
||||
|
@ -1558,13 +1547,11 @@ describe("generated parser behavior", function() {
|
|||
});
|
||||
|
||||
it("handles nested comments example correctly", function() {
|
||||
/*
|
||||
* Begin ← "(*"
|
||||
* End ← "*)"
|
||||
* C ← Begin N* End
|
||||
* N ← C / (!Begin !End Z)
|
||||
* Z ← any single character
|
||||
*/
|
||||
// Begin ← "(*"
|
||||
// End ← "*)"
|
||||
// C ← Begin N* End
|
||||
// N ← C / (!Begin !End Z)
|
||||
// Z ← any single character
|
||||
let parser = peg.generate([
|
||||
'C = begin:Begin ns:N* end:End { return begin + ns.join("") + end; }',
|
||||
'N = C',
|
||||
|
|
|
@ -5,11 +5,9 @@
|
|||
|
||||
"use strict";
|
||||
|
||||
/*
|
||||
* Small server whose main purpose is to ensure that both the specced code and
|
||||
* the specs get passed through Babel & Browserify before they are served to the
|
||||
* browser.
|
||||
*/
|
||||
// Small server whose main purpose is to ensure that both the specced code and
|
||||
// the specs get passed through Babel & Browserify before they are served to the
|
||||
// browser.
|
||||
|
||||
let express = require("express");
|
||||
let logger = require("morgan");
|
||||
|
|
|
@ -39,7 +39,7 @@ describe("compiler pass |reportInfiniteRecursion|", function() {
|
|||
expect(pass).not.toReportError('start = "" "" "a" start');
|
||||
});
|
||||
|
||||
/* Regression test for #359. */
|
||||
// Regression test for #359.
|
||||
it("reports left recursion when rule reference is wrapped in an expression", function() {
|
||||
expect(pass).toReportError('start = "" start?');
|
||||
});
|
||||
|
|
|
@ -236,7 +236,7 @@ describe("PEG.js grammar parser", function() {
|
|||
});
|
||||
});
|
||||
|
||||
/* Canonical Grammar is "a = \"abcd\"; b = \"efgh\"; c = \"ijkl\";". */
|
||||
// Canonical Grammar is "a = \"abcd\"; b = \"efgh\"; c = \"ijkl\";".
|
||||
it("parses Grammar", function() {
|
||||
expect('\na = "abcd";\n').toParseAs(
|
||||
{ type: "grammar", initializer: null, rules: [ruleA] }
|
||||
|
@ -249,14 +249,14 @@ describe("PEG.js grammar parser", function() {
|
|||
);
|
||||
});
|
||||
|
||||
/* Canonical Initializer is "{ code }". */
|
||||
// Canonical Initializer is "{ code }".
|
||||
it("parses Initializer", function() {
|
||||
expect('{ code };start = "abcd"').toParseAs(
|
||||
{ type: "grammar", initializer: initializer, rules: [ruleStart] }
|
||||
);
|
||||
});
|
||||
|
||||
/* Canonical Rule is "a = \"abcd\";". */
|
||||
// Canonical Rule is "a = \"abcd\";".
|
||||
it("parses Rule", function() {
|
||||
expect('start\n=\n"abcd";').toParseAs(
|
||||
oneRuleGrammar(literalAbcd)
|
||||
|
@ -266,14 +266,14 @@ describe("PEG.js grammar parser", function() {
|
|||
);
|
||||
});
|
||||
|
||||
/* Canonical Expression is "\"abcd\"". */
|
||||
// Canonical Expression is "\"abcd\"".
|
||||
it("parses Expression", function() {
|
||||
expect('start = "abcd" / "efgh" / "ijkl"').toParseAs(
|
||||
oneRuleGrammar(choice)
|
||||
);
|
||||
});
|
||||
|
||||
/* Canonical ChoiceExpression is "\"abcd\" / \"efgh\" / \"ijkl\"". */
|
||||
// Canonical ChoiceExpression is "\"abcd\" / \"efgh\" / \"ijkl\"".
|
||||
it("parses ChoiceExpression", function() {
|
||||
expect('start = "abcd" { code }').toParseAs(
|
||||
oneRuleGrammar(actionAbcd)
|
||||
|
@ -288,7 +288,7 @@ describe("PEG.js grammar parser", function() {
|
|||
);
|
||||
});
|
||||
|
||||
/* Canonical ActionExpression is "\"abcd\" { code }". */
|
||||
// Canonical ActionExpression is "\"abcd\" { code }".
|
||||
it("parses ActionExpression", function() {
|
||||
expect('start = "abcd" "efgh" "ijkl"').toParseAs(
|
||||
oneRuleGrammar(sequence)
|
||||
|
@ -298,7 +298,7 @@ describe("PEG.js grammar parser", function() {
|
|||
);
|
||||
});
|
||||
|
||||
/* Canonical SequenceExpression is "\"abcd\" \"efgh\" \"ijkl\"". */
|
||||
// Canonical SequenceExpression is "\"abcd\" \"efgh\" \"ijkl\"".
|
||||
it("parses SequenceExpression", function() {
|
||||
expect('start = a:"abcd"').toParseAs(
|
||||
oneRuleGrammar(labeledAbcd)
|
||||
|
@ -311,39 +311,39 @@ describe("PEG.js grammar parser", function() {
|
|||
);
|
||||
});
|
||||
|
||||
/* Canonical LabeledExpression is "a:\"abcd\"". */
|
||||
// Canonical LabeledExpression is "a:\"abcd\"".
|
||||
it("parses LabeledExpression", function() {
|
||||
expect('start = a\n:\n!"abcd"').toParseAs(oneRuleGrammar(labeledSimpleNot));
|
||||
expect('start = !"abcd"' ).toParseAs(oneRuleGrammar(simpleNotAbcd));
|
||||
});
|
||||
|
||||
/* Canonical PrefixedExpression is "!\"abcd\"". */
|
||||
// Canonical PrefixedExpression is "!\"abcd\"".
|
||||
it("parses PrefixedExpression", function() {
|
||||
expect('start = !\n"abcd"?' ).toParseAs(oneRuleGrammar(simpleNotOptional));
|
||||
expect('start = "abcd"?' ).toParseAs(oneRuleGrammar(optional));
|
||||
});
|
||||
|
||||
/* Canonical PrefixedOperator is "!". */
|
||||
// Canonical PrefixedOperator is "!".
|
||||
it("parses PrefixedOperator", function() {
|
||||
expect('start = $"abcd"?').toParseAs(oneRuleGrammar(textOptional));
|
||||
expect('start = &"abcd"?').toParseAs(oneRuleGrammar(simpleAndOptional));
|
||||
expect('start = !"abcd"?').toParseAs(oneRuleGrammar(simpleNotOptional));
|
||||
});
|
||||
|
||||
/* Canonical SuffixedExpression is "\"ebcd\"?". */
|
||||
// Canonical SuffixedExpression is "\"ebcd\"?".
|
||||
it("parses SuffixedExpression", function() {
|
||||
expect('start = "abcd"\n?').toParseAs(oneRuleGrammar(optional));
|
||||
expect('start = "abcd"' ).toParseAs(oneRuleGrammar(literalAbcd));
|
||||
});
|
||||
|
||||
/* Canonical SuffixedOperator is "?". */
|
||||
// Canonical SuffixedOperator is "?".
|
||||
it("parses SuffixedOperator", function() {
|
||||
expect('start = "abcd"?').toParseAs(oneRuleGrammar(optional));
|
||||
expect('start = "abcd"*').toParseAs(oneRuleGrammar(zeroOrMore));
|
||||
expect('start = "abcd"+').toParseAs(oneRuleGrammar(oneOrMore));
|
||||
});
|
||||
|
||||
/* Canonical PrimaryExpression is "\"abcd\"". */
|
||||
// Canonical PrimaryExpression is "\"abcd\"".
|
||||
it("parses PrimaryExpression", function() {
|
||||
expect('start = "abcd"' ).toParseAs(trivialGrammar);
|
||||
expect('start = [a-d]' ).toParseAs(classGrammar([["a", "d"]], false, false));
|
||||
|
@ -356,7 +356,7 @@ describe("PEG.js grammar parser", function() {
|
|||
expect('start = (\n"abcd"\n)' ).toParseAs(trivialGrammar);
|
||||
});
|
||||
|
||||
/* Canonical RuleReferenceExpression is "a". */
|
||||
// Canonical RuleReferenceExpression is "a".
|
||||
it("parses RuleReferenceExpression", function() {
|
||||
expect('start = a').toParseAs(ruleRefGrammar("a"));
|
||||
|
||||
|
@ -364,20 +364,20 @@ describe("PEG.js grammar parser", function() {
|
|||
expect('start = a\n"abcd"\n=').toFailToParse();
|
||||
});
|
||||
|
||||
/* Canonical SemanticPredicateExpression is "!{ code }". */
|
||||
// Canonical SemanticPredicateExpression is "!{ code }".
|
||||
it("parses SemanticPredicateExpression", function() {
|
||||
expect('start = !\n{ code }').toParseAs(oneRuleGrammar(semanticNot));
|
||||
});
|
||||
|
||||
/* Canonical SemanticPredicateOperator is "!". */
|
||||
// Canonical SemanticPredicateOperator is "!".
|
||||
it("parses SemanticPredicateOperator", function() {
|
||||
expect('start = &{ code }').toParseAs(oneRuleGrammar(semanticAnd));
|
||||
expect('start = !{ code }').toParseAs(oneRuleGrammar(semanticNot));
|
||||
});
|
||||
|
||||
/* The SourceCharacter rule is not tested. */
|
||||
// The SourceCharacter rule is not tested.
|
||||
|
||||
/* Canonical WhiteSpace is " ". */
|
||||
// Canonical WhiteSpace is " ".
|
||||
it("parses WhiteSpace", function() {
|
||||
expect('start =\t"abcd"' ).toParseAs(trivialGrammar);
|
||||
expect('start =\v"abcd"' ).toParseAs(trivialGrammar);
|
||||
|
@ -388,7 +388,7 @@ describe("PEG.js grammar parser", function() {
|
|||
expect('start =\u1680"abcd"').toParseAs(trivialGrammar);
|
||||
});
|
||||
|
||||
/* Canonical LineTerminator is "\n". */
|
||||
// Canonical LineTerminator is "\n".
|
||||
it("parses LineTerminator", function() {
|
||||
expect('start = "\n"' ).toFailToParse();
|
||||
expect('start = "\r"' ).toFailToParse();
|
||||
|
@ -396,7 +396,7 @@ describe("PEG.js grammar parser", function() {
|
|||
expect('start = "\u2029"').toFailToParse();
|
||||
});
|
||||
|
||||
/* Canonical LineTerminatorSequence is "\r\n". */
|
||||
// Canonical LineTerminatorSequence is "\r\n".
|
||||
it("parses LineTerminatorSequence", function() {
|
||||
expect('start =\n"abcd"' ).toParseAs(trivialGrammar);
|
||||
expect('start =\r\n"abcd"' ).toParseAs(trivialGrammar);
|
||||
|
@ -430,7 +430,7 @@ describe("PEG.js grammar parser", function() {
|
|||
expect('a = "abcd"/*\n*/\r\nb = "efgh"').toFailToParse();
|
||||
});
|
||||
|
||||
/* Canonical SingleLineComment is "// comment". */
|
||||
// Canonical SingleLineComment is "// comment".
|
||||
it("parses SingleLineComment", function() {
|
||||
expect('start =//\n"abcd"' ).toParseAs(trivialGrammar);
|
||||
expect('start =//a\n"abcd"' ).toParseAs(trivialGrammar);
|
||||
|
@ -439,19 +439,19 @@ describe("PEG.js grammar parser", function() {
|
|||
expect('start =//\n@\n"abcd"').toFailToParse();
|
||||
});
|
||||
|
||||
/* Canonical Identifier is "a". */
|
||||
// Canonical Identifier is "a".
|
||||
it("parses Identifier", function() {
|
||||
expect('start = a:"abcd"').toParseAs(oneRuleGrammar(labeledAbcd));
|
||||
});
|
||||
|
||||
/* Canonical IdentifierName is "a". */
|
||||
// Canonical IdentifierName is "a".
|
||||
it("parses IdentifierName", function() {
|
||||
expect('start = a' ).toParseAs(ruleRefGrammar("a"));
|
||||
expect('start = ab' ).toParseAs(ruleRefGrammar("ab"));
|
||||
expect('start = abcd').toParseAs(ruleRefGrammar("abcd"));
|
||||
});
|
||||
|
||||
/* Canonical IdentifierStart is "a". */
|
||||
// Canonical IdentifierStart is "a".
|
||||
it("parses IdentifierStart", function() {
|
||||
expect('start = a' ).toParseAs(ruleRefGrammar("a"));
|
||||
expect('start = $' ).toParseAs(ruleRefGrammar("$"));
|
||||
|
@ -459,7 +459,7 @@ describe("PEG.js grammar parser", function() {
|
|||
expect('start = \\u0061').toParseAs(ruleRefGrammar("a"));
|
||||
});
|
||||
|
||||
/* Canonical IdentifierPart is "a". */
|
||||
// Canonical IdentifierPart is "a".
|
||||
it("parses IdentifierPart", function() {
|
||||
expect('start = aa' ).toParseAs(ruleRefGrammar("aa"));
|
||||
expect('start = a\u0300').toParseAs(ruleRefGrammar("a\u0300"));
|
||||
|
@ -469,15 +469,15 @@ describe("PEG.js grammar parser", function() {
|
|||
expect('start = a\u200D').toParseAs(ruleRefGrammar("a\u200D"));
|
||||
});
|
||||
|
||||
/* Unicode rules and reserved word rules are not tested. */
|
||||
// Unicode rules and reserved word rules are not tested.
|
||||
|
||||
/* Canonical LiteralMatcher is "\"abcd\"". */
|
||||
// Canonical LiteralMatcher is "\"abcd\"".
|
||||
it("parses LiteralMatcher", function() {
|
||||
expect('start = "abcd"' ).toParseAs(literalGrammar("abcd", false));
|
||||
expect('start = "abcd"i').toParseAs(literalGrammar("abcd", true));
|
||||
});
|
||||
|
||||
/* Canonical StringLiteral is "\"abcd\"". */
|
||||
// Canonical StringLiteral is "\"abcd\"".
|
||||
it("parses StringLiteral", function() {
|
||||
expect('start = ""' ).toParseAs(literalGrammar("", false));
|
||||
expect('start = "a"' ).toParseAs(literalGrammar("a", false));
|
||||
|
@ -488,7 +488,7 @@ describe("PEG.js grammar parser", function() {
|
|||
expect("start = 'abc'").toParseAs(literalGrammar("abc", false));
|
||||
});
|
||||
|
||||
/* Canonical DoubleStringCharacter is "a". */
|
||||
// Canonical DoubleStringCharacter is "a".
|
||||
it("parses DoubleStringCharacter", function() {
|
||||
expect('start = "a"' ).toParseAs(literalGrammar("a", false));
|
||||
expect('start = "\\n"' ).toParseAs(literalGrammar("\n", false));
|
||||
|
@ -499,7 +499,7 @@ describe("PEG.js grammar parser", function() {
|
|||
expect('start = "\n"').toFailToParse();
|
||||
});
|
||||
|
||||
/* Canonical SingleStringCharacter is "a". */
|
||||
// Canonical SingleStringCharacter is "a".
|
||||
it("parses SingleStringCharacter", function() {
|
||||
expect("start = 'a'" ).toParseAs(literalGrammar("a", false));
|
||||
expect("start = '\\n'" ).toParseAs(literalGrammar("\n", false));
|
||||
|
@ -510,7 +510,7 @@ describe("PEG.js grammar parser", function() {
|
|||
expect("start = '\n'").toFailToParse();
|
||||
});
|
||||
|
||||
/* Canonical CharacterClassMatcher is "[a-d]". */
|
||||
// Canonical CharacterClassMatcher is "[a-d]".
|
||||
it("parses CharacterClassMatcher", function() {
|
||||
expect('start = []').toParseAs(
|
||||
classGrammar([], false, false)
|
||||
|
@ -540,7 +540,7 @@ describe("PEG.js grammar parser", function() {
|
|||
);
|
||||
});
|
||||
|
||||
/* Canonical ClassCharacterRange is "a-d". */
|
||||
// Canonical ClassCharacterRange is "a-d".
|
||||
it("parses ClassCharacterRange", function() {
|
||||
expect('start = [a-d]').toParseAs(classGrammar([["a", "d"]], false, false));
|
||||
|
||||
|
@ -550,7 +550,7 @@ describe("PEG.js grammar parser", function() {
|
|||
});
|
||||
});
|
||||
|
||||
/* Canonical ClassCharacter is "a". */
|
||||
// Canonical ClassCharacter is "a".
|
||||
it("parses ClassCharacter", function() {
|
||||
expect('start = [a]' ).toParseAs(classGrammar(["a"], false, false));
|
||||
expect('start = [\\n]' ).toParseAs(classGrammar(["\n"], false, false));
|
||||
|
@ -561,12 +561,12 @@ describe("PEG.js grammar parser", function() {
|
|||
expect('start = [\n]').toFailToParse();
|
||||
});
|
||||
|
||||
/* Canonical LineContinuation is "\\\n". */
|
||||
// Canonical LineContinuation is "\\\n".
|
||||
it("parses LineContinuation", function() {
|
||||
expect('start = "\\\r\n"').toParseAs(literalGrammar("", false));
|
||||
});
|
||||
|
||||
/* Canonical EscapeSequence is "n". */
|
||||
// Canonical EscapeSequence is "n".
|
||||
it("parses EscapeSequence", function() {
|
||||
expect('start = "\\n"' ).toParseAs(literalGrammar("\n", false));
|
||||
expect('start = "\\0"' ).toParseAs(literalGrammar("\x00", false));
|
||||
|
@ -576,13 +576,13 @@ describe("PEG.js grammar parser", function() {
|
|||
expect('start = "\\09"').toFailToParse();
|
||||
});
|
||||
|
||||
/* Canonical CharacterEscapeSequence is "n". */
|
||||
// Canonical CharacterEscapeSequence is "n".
|
||||
it("parses CharacterEscapeSequence", function() {
|
||||
expect('start = "\\n"').toParseAs(literalGrammar("\n", false));
|
||||
expect('start = "\\a"').toParseAs(literalGrammar("a", false));
|
||||
});
|
||||
|
||||
/* Canonical SingleEscapeCharacter is "n". */
|
||||
// Canonical SingleEscapeCharacter is "n".
|
||||
it("parses SingleEscapeCharacter", function() {
|
||||
expect('start = "\\\'"').toParseAs(literalGrammar("'", false));
|
||||
expect('start = "\\""' ).toParseAs(literalGrammar('"', false));
|
||||
|
@ -595,44 +595,40 @@ describe("PEG.js grammar parser", function() {
|
|||
expect('start = "\\v"' ).toParseAs(literalGrammar("\v", false));
|
||||
});
|
||||
|
||||
/* Canonical NonEscapeCharacter is "a". */
|
||||
// Canonical NonEscapeCharacter is "a".
|
||||
it("parses NonEscapeCharacter", function() {
|
||||
expect('start = "\\a"').toParseAs(literalGrammar("a", false));
|
||||
|
||||
/*
|
||||
* The negative predicate is impossible to test with PEG.js grammar
|
||||
* structure.
|
||||
*/
|
||||
// The negative predicate is impossible to test with PEG.js grammar
|
||||
// structure.
|
||||
});
|
||||
|
||||
/*
|
||||
* The EscapeCharacter rule is impossible to test with PEG.js grammar
|
||||
* structure.
|
||||
*/
|
||||
// The EscapeCharacter rule is impossible to test with PEG.js grammar
|
||||
// structure.
|
||||
|
||||
/* Canonical HexEscapeSequence is "xFF". */
|
||||
// Canonical HexEscapeSequence is "xFF".
|
||||
it("parses HexEscapeSequence", function() {
|
||||
expect('start = "\\xFF"').toParseAs(literalGrammar("\xFF", false));
|
||||
});
|
||||
|
||||
/* Canonical UnicodeEscapeSequence is "uFFFF". */
|
||||
// Canonical UnicodeEscapeSequence is "uFFFF".
|
||||
it("parses UnicodeEscapeSequence", function() {
|
||||
expect('start = "\\uFFFF"').toParseAs(literalGrammar("\uFFFF", false));
|
||||
});
|
||||
|
||||
/* Digit rules are not tested. */
|
||||
// Digit rules are not tested.
|
||||
|
||||
/* Canonical AnyMatcher is ".". */
|
||||
// Canonical AnyMatcher is ".".
|
||||
it("parses AnyMatcher", function() {
|
||||
expect('start = .').toParseAs(anyGrammar());
|
||||
});
|
||||
|
||||
/* Canonical CodeBlock is "{ code }". */
|
||||
// Canonical CodeBlock is "{ code }".
|
||||
it("parses CodeBlock", function() {
|
||||
expect('start = "abcd" { code }').toParseAs(actionGrammar(" code "));
|
||||
});
|
||||
|
||||
/* Canonical Code is " code ". */
|
||||
// Canonical Code is " code ".
|
||||
it("parses Code", function() {
|
||||
expect('start = "abcd" {a}' ).toParseAs(actionGrammar("a"));
|
||||
expect('start = "abcd" {abc}' ).toParseAs(actionGrammar("abc"));
|
||||
|
@ -643,9 +639,9 @@ describe("PEG.js grammar parser", function() {
|
|||
expect('start = "abcd" {}}').toFailToParse();
|
||||
});
|
||||
|
||||
/* Unicode character category rules and token rules are not tested. */
|
||||
// Unicode character category rules and token rules are not tested.
|
||||
|
||||
/* Canonical __ is "\n". */
|
||||
// Canonical __ is "\n".
|
||||
it("parses __", function() {
|
||||
expect('start ="abcd"' ).toParseAs(trivialGrammar);
|
||||
expect('start = "abcd"' ).toParseAs(trivialGrammar);
|
||||
|
@ -654,7 +650,7 @@ describe("PEG.js grammar parser", function() {
|
|||
expect('start = "abcd"' ).toParseAs(trivialGrammar);
|
||||
});
|
||||
|
||||
/* Canonical _ is " ". */
|
||||
// Canonical _ is " ".
|
||||
it("parses _", function() {
|
||||
expect('a = "abcd"\r\nb = "efgh"' ).toParseAs(twoRuleGrammar);
|
||||
expect('a = "abcd" \r\nb = "efgh"' ).toParseAs(twoRuleGrammar);
|
||||
|
@ -662,7 +658,7 @@ describe("PEG.js grammar parser", function() {
|
|||
expect('a = "abcd" \r\nb = "efgh"' ).toParseAs(twoRuleGrammar);
|
||||
});
|
||||
|
||||
/* Canonical EOS is ";". */
|
||||
// Canonical EOS is ";".
|
||||
it("parses EOS", function() {
|
||||
expect('a = "abcd"\n;b = "efgh"' ).toParseAs(twoRuleGrammar);
|
||||
expect('a = "abcd" \r\nb = "efgh"' ).toParseAs(twoRuleGrammar);
|
||||
|
@ -670,7 +666,7 @@ describe("PEG.js grammar parser", function() {
|
|||
expect('a = "abcd"\nb = "efgh"' ).toParseAs(twoRuleGrammar);
|
||||
});
|
||||
|
||||
/* Canonical EOF is the end of input. */
|
||||
// Canonical EOF is the end of input.
|
||||
it("parses EOF", function() {
|
||||
expect('start = "abcd"\n').toParseAs(trivialGrammar);
|
||||
});
|
||||
|
|
112
src/parser.pegjs
112
src/parser.pegjs
|
@ -1,27 +1,25 @@
|
|||
/*
|
||||
* PEG.js Grammar
|
||||
* ==============
|
||||
*
|
||||
* PEG.js grammar syntax is designed to be simple, expressive, and similar to
|
||||
* JavaScript where possible. This means that many rules, especially in the
|
||||
* lexical part, are based on the grammar from ECMA-262, 5.1 Edition [1]. Some
|
||||
* are directly taken or adapted from the JavaScript example grammar (see
|
||||
* examples/javascript.pegjs).
|
||||
*
|
||||
* Limitations:
|
||||
*
|
||||
* * Non-BMP characters are completely ignored to avoid surrogate pair
|
||||
* handling.
|
||||
*
|
||||
* * One can create identifiers containing illegal characters using Unicode
|
||||
* escape sequences. For example, "abcd\u0020efgh" is not a valid
|
||||
* identifier, but it is accepted by the parser.
|
||||
*
|
||||
* Both limitations could be resolved, but the costs would likely outweigh
|
||||
* the benefits.
|
||||
*
|
||||
* [1] http://www.ecma-international.org/publications/standards/Ecma-262.htm
|
||||
*/
|
||||
// PEG.js Grammar
|
||||
// ==============
|
||||
//
|
||||
// PEG.js grammar syntax is designed to be simple, expressive, and similar to
|
||||
// JavaScript where possible. This means that many rules, especially in the
|
||||
// lexical part, are based on the grammar from ECMA-262, 5.1 Edition [1]. Some
|
||||
// are directly taken or adapted from the JavaScript example grammar (see
|
||||
// examples/javascript.pegjs).
|
||||
//
|
||||
// Limitations:
|
||||
//
|
||||
// * Non-BMP characters are completely ignored to avoid surrogate pair
|
||||
// handling.
|
||||
//
|
||||
// * One can create identifiers containing illegal characters using Unicode
|
||||
// escape sequences. For example, "abcd\u0020efgh" is not a valid
|
||||
// identifier, but it is accepted by the parser.
|
||||
//
|
||||
// Both limitations could be resolved, but the costs would likely outweigh
|
||||
// the benefits.
|
||||
//
|
||||
// [1] http://www.ecma-international.org/publications/standards/Ecma-262.htm
|
||||
|
||||
{
|
||||
const OPS_TO_PREFIXED_TYPES = {
|
||||
|
@ -54,7 +52,7 @@
|
|||
}
|
||||
}
|
||||
|
||||
/* ---- Syntactic Grammar ----- */
|
||||
// ---- Syntactic Grammar -----
|
||||
|
||||
Grammar
|
||||
= __ initializer:(Initializer __)? rules:(Rule __)+ {
|
||||
|
@ -177,12 +175,10 @@ PrimaryExpression
|
|||
/ RuleReferenceExpression
|
||||
/ SemanticPredicateExpression
|
||||
/ "(" __ expression:Expression __ ")" {
|
||||
/*
|
||||
* The purpose of the "group" AST node is just to isolate label scope. We
|
||||
* don't need to put it around nodes that can't contain any labels or
|
||||
* nodes that already isolate label scope themselves. This leaves us with
|
||||
* "labeled" and "sequence".
|
||||
*/
|
||||
// The purpose of the "group" AST node is just to isolate label scope. We
|
||||
// don't need to put it around nodes that can't contain any labels or
|
||||
// nodes that already isolate label scope themselves. This leaves us with
|
||||
// "labeled" and "sequence".
|
||||
return expression.type === 'labeled' || expression.type === 'sequence'
|
||||
? { type: "group", expression: expression }
|
||||
: expression;
|
||||
|
@ -206,7 +202,7 @@ SemanticPredicateOperator
|
|||
= "&"
|
||||
/ "!"
|
||||
|
||||
/* ---- Lexical Grammar ----- */
|
||||
// ---- Lexical Grammar -----
|
||||
|
||||
SourceCharacter
|
||||
= .
|
||||
|
@ -445,30 +441,28 @@ CodeBlock "code block"
|
|||
Code
|
||||
= $((![{}] SourceCharacter)+ / "{" Code "}")*
|
||||
|
||||
/*
|
||||
* Unicode Character Categories
|
||||
*
|
||||
* Extracted from the following Unicode Character Database file:
|
||||
*
|
||||
* http://www.unicode.org/Public/8.0.0/ucd/extracted/DerivedGeneralCategory.txt
|
||||
*
|
||||
* Unix magic used:
|
||||
*
|
||||
* grep "; $CATEGORY" DerivedGeneralCategory.txt | # Filter characters
|
||||
* cut -f1 -d " " | # Extract code points
|
||||
* grep -v '[0-9a-fA-F]\{5\}' | # Exclude non-BMP characters
|
||||
* sed -e 's/\.\./-/' | # Adjust formatting
|
||||
* sed -e 's/\([0-9a-fA-F]\{4\}\)/\\u\1/g' | # Adjust formatting
|
||||
* tr -d '\n' # Join lines
|
||||
*
|
||||
* ECMA-262 allows using Unicode 3.0 or later, version 8.0.0 was the latest one
|
||||
* at the time of writing.
|
||||
*
|
||||
* Non-BMP characters are completely ignored to avoid surrogate pair handling
|
||||
* (detecting surrogate pairs isn't possible with a simple character class and
|
||||
* other methods would degrade performance). I don't consider it a big deal as
|
||||
* even parsers in JavaScript engines of common browsers seem to ignore them.
|
||||
*/
|
||||
// Unicode Character Categories
|
||||
//
|
||||
// Extracted from the following Unicode Character Database file:
|
||||
//
|
||||
// http://www.unicode.org/Public/8.0.0/ucd/extracted/DerivedGeneralCategory.txt
|
||||
//
|
||||
// Unix magic used:
|
||||
//
|
||||
// grep "; $CATEGORY" DerivedGeneralCategory.txt | # Filter characters
|
||||
// cut -f1 -d " " | # Extract code points
|
||||
// grep -v '[0-9a-fA-F]\{5\}' | # Exclude non-BMP characters
|
||||
// sed -e 's/\.\./-/' | # Adjust formatting
|
||||
// sed -e 's/\([0-9a-fA-F]\{4\}\)/\\u\1/g' | # Adjust formatting
|
||||
// tr -d '\n' # Join lines
|
||||
//
|
||||
// ECMA-262 allows using Unicode 3.0 or later, version 8.0.0 was the latest one
|
||||
// at the time of writing.
|
||||
//
|
||||
// Non-BMP characters are completely ignored to avoid surrogate pair handling
|
||||
// (detecting surrogate pairs isn't possible with a simple character class and
|
||||
// other methods would degrade performance). I don't consider it a big deal as
|
||||
// even parsers in JavaScript engines of common browsers seem to ignore them.
|
||||
|
||||
// Letter, Lowercase
|
||||
Ll = [\u0061-\u007A\u00B5\u00DF-\u00F6\u00F8-\u00FF\u0101\u0103\u0105\u0107\u0109\u010B\u010D\u010F\u0111\u0113\u0115\u0117\u0119\u011B\u011D\u011F\u0121\u0123\u0125\u0127\u0129\u012B\u012D\u012F\u0131\u0133\u0135\u0137-\u0138\u013A\u013C\u013E\u0140\u0142\u0144\u0146\u0148-\u0149\u014B\u014D\u014F\u0151\u0153\u0155\u0157\u0159\u015B\u015D\u015F\u0161\u0163\u0165\u0167\u0169\u016B\u016D\u016F\u0171\u0173\u0175\u0177\u017A\u017C\u017E-\u0180\u0183\u0185\u0188\u018C-\u018D\u0192\u0195\u0199-\u019B\u019E\u01A1\u01A3\u01A5\u01A8\u01AA-\u01AB\u01AD\u01B0\u01B4\u01B6\u01B9-\u01BA\u01BD-\u01BF\u01C6\u01C9\u01CC\u01CE\u01D0\u01D2\u01D4\u01D6\u01D8\u01DA\u01DC-\u01DD\u01DF\u01E1\u01E3\u01E5\u01E7\u01E9\u01EB\u01ED\u01EF-\u01F0\u01F3\u01F5\u01F9\u01FB\u01FD\u01FF\u0201\u0203\u0205\u0207\u0209\u020B\u020D\u020F\u0211\u0213\u0215\u0217\u0219\u021B\u021D\u021F\u0221\u0223\u0225\u0227\u0229\u022B\u022D\u022F\u0231\u0233-\u0239\u023C\u023F-\u0240\u0242\u0247\u0249\u024B\u024D\u024F-\u0293\u0295-\u02AF\u0371\u0373\u0377\u037B-\u037D\u0390\u03AC-\u03CE\u03D0-\u03D1\u03D5-\u03D7\u03D9\u03DB\u03DD\u03DF\u03E1\u03E3\u03E5\u03E7\u03E9\u03EB\u03ED\u03EF-\u03F3\u03F5\u03F8\u03FB-\u03FC\u0430-\u045F\u0461\u0463\u0465\u0467\u0469\u046B\u046D\u046F\u0471\u0473\u0475\u0477\u0479\u047B\u047D\u047F\u0481\u048B\u048D\u048F\u0491\u0493\u0495\u0497\u0499\u049B\u049D\u049F\u04A1\u04A3\u04A5\u04A7\u04A9\u04AB\u04AD\u04AF\u04B1\u04B3\u04B5\u04B7\u04B9\u04BB\u04BD\u04BF\u04C2\u04C4\u04C6\u04C8\u04CA\u04CC\u04CE-\u04CF\u04D1\u04D3\u04D5\u04D7\u04D9\u04DB\u04DD\u04DF\u04E1\u04E3\u04E5\u04E7\u04E9\u04EB\u04ED\u04EF\u04F1\u04F3\u04F5\u04F7\u04F9\u04FB\u04FD\u04FF\u0501\u0503\u0505\u0507\u0509\u050B\u050D\u050F\u0511\u0513\u0515\u0517\u0519\u051B\u051D\u051F\u0521\u0523\u0525\u0527\u0529\u052B\u052D\u052F\u0561-\u0587\u13F8-\u13FD\u1D00-\u1D2B\u1D6B-\u1D77\u1D79-\u1D9A\u1E01\u1E03\u1E05\u1E07\u1E09\u1E0B\u1E0D\u1E0F\u1E11\u1E13\u1E15\u1E17\u1E19\u1E1B\u1E1D\u1E1F\u1E21\u1E23\u1E25\u1E27\u1E29\u1E2B\u1E2D\u1E2F\u1E31\u1E33\u1E35\u1E37\u1E39\u1E3B\u1E3D\u1E3F\u1E41\u1E43\u1E45\u1E47\u1E49\u1E4B\u1E4D\u1E4F\u1E51\u1E53\u1E55\u1E57\u1E59\u1E5B\u1E5D\u1E5F\u1E61\u1E63\u1E65\u1E67\u1E69\u1E6B\u1E6D\u1E6F\u1E71\u1E73\u1E75\u1E77\u1E79\u1E7B\u1E7D\u1E7F\u1E81\u1E83\u1E85\u1E87\u1E89\u1E8B\u1E8D\u1E8F\u1E91\u1E93\u1E95-\u1E9D\u1E9F\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7\u1EB9\u1EBB\u1EBD\u1EBF\u1EC1\u1EC3\u1EC5\u1EC7\u1EC9\u1ECB\u1ECD\u1ECF\u1ED1\u1ED3\u1ED5\u1ED7\u1ED9\u1EDB\u1EDD\u1EDF\u1EE1\u1EE3\u1EE5\u1EE7\u1EE9\u1EEB\u1EED\u1EEF\u1EF1\u1EF3\u1EF5\u1EF7\u1EF9\u1EFB\u1EFD\u1EFF-\u1F07\u1F10-\u1F15\u1F20-\u1F27\u1F30-\u1F37\u1F40-\u1F45\u1F50-\u1F57\u1F60-\u1F67\u1F70-\u1F7D\u1F80-\u1F87\u1F90-\u1F97\u1FA0-\u1FA7\u1FB0-\u1FB4\u1FB6-\u1FB7\u1FBE\u1FC2-\u1FC4\u1FC6-\u1FC7\u1FD0-\u1FD3\u1FD6-\u1FD7\u1FE0-\u1FE7\u1FF2-\u1FF4\u1FF6-\u1FF7\u210A\u210E-\u210F\u2113\u212F\u2134\u2139\u213C-\u213D\u2146-\u2149\u214E\u2184\u2C30-\u2C5E\u2C61\u2C65-\u2C66\u2C68\u2C6A\u2C6C\u2C71\u2C73-\u2C74\u2C76-\u2C7B\u2C81\u2C83\u2C85\u2C87\u2C89\u2C8B\u2C8D\u2C8F\u2C91\u2C93\u2C95\u2C97\u2C99\u2C9B\u2C9D\u2C9F\u2CA1\u2CA3\u2CA5\u2CA7\u2CA9\u2CAB\u2CAD\u2CAF\u2CB1\u2CB3\u2CB5\u2CB7\u2CB9\u2CBB\u2CBD\u2CBF\u2CC1\u2CC3\u2CC5\u2CC7\u2CC9\u2CCB\u2CCD\u2CCF\u2CD1\u2CD3\u2CD5\u2CD7\u2CD9\u2CDB\u2CDD\u2CDF\u2CE1\u2CE3-\u2CE4\u2CEC\u2CEE\u2CF3\u2D00-\u2D25\u2D27\u2D2D\uA641\uA643\uA645\uA647\uA649\uA64B\uA64D\uA64F\uA651\uA653\uA655\uA657\uA659\uA65B\uA65D\uA65F\uA661\uA663\uA665\uA667\uA669\uA66B\uA66D\uA681\uA683\uA685\uA687\uA689\uA68B\uA68D\uA68F\uA691\uA693\uA695\uA697\uA699\uA69B\uA723\uA725\uA727\uA729\uA72B\uA72D\uA72F-\uA731\uA733\uA735\uA737\uA739\uA73B\uA73D\uA73F\uA741\uA743\uA745\uA747\uA749\uA74B\uA74D\uA74F\uA751\uA753\uA755\uA757\uA759\uA75B\uA75D\uA75F\uA761\uA763\uA765\uA767\uA769\uA76B\uA76D\uA76F\uA771-\uA778\uA77A\uA77C\uA77F\uA781\uA783\uA785\uA787\uA78C\uA78E\uA791\uA793-\uA795\uA797\uA799\uA79B\uA79D\uA79F\uA7A1\uA7A3\uA7A5\uA7A7\uA7A9\uA7B5\uA7B7\uA7FA\uAB30-\uAB5A\uAB60-\uAB65\uAB70-\uABBF\uFB00-\uFB06\uFB13-\uFB17\uFF41-\uFF5A]
|
||||
|
@ -503,7 +497,7 @@ Pc = [\u005F\u203F-\u2040\u2054\uFE33-\uFE34\uFE4D-\uFE4F\uFF3F]
|
|||
// Separator, Space
|
||||
Zs = [\u0020\u00A0\u1680\u2000-\u200A\u202F\u205F\u3000]
|
||||
|
||||
/* Tokens */
|
||||
// Tokens
|
||||
|
||||
BreakToken = "break" !IdentifierPart
|
||||
CaseToken = "case" !IdentifierPart
|
||||
|
@ -542,7 +536,7 @@ VoidToken = "void" !IdentifierPart
|
|||
WhileToken = "while" !IdentifierPart
|
||||
WithToken = "with" !IdentifierPart
|
||||
|
||||
/* Skipped */
|
||||
// Skipped
|
||||
|
||||
__
|
||||
= (WhiteSpace / LineTerminatorSequence / Comment)*
|
||||
|
@ -550,7 +544,7 @@ __
|
|||
_
|
||||
= (WhiteSpace / MultiLineCommentNoLineTerminator)*
|
||||
|
||||
/* Automatic Semicolon Insertion */
|
||||
// Automatic Semicolon Insertion
|
||||
|
||||
EOS
|
||||
= __ ";"
|
||||
|
|
Loading…
Reference in a new issue