diff --git a/Makefile b/Makefile index f03570b..f71e5f7 100644 --- a/Makefile +++ b/Makefile @@ -68,14 +68,12 @@ browser: rm -f $(BROWSER_FILE_DEV) rm -f $(BROWSER_FILE_MIN) - echo '/*' >> $(BROWSER_FILE_DEV) - echo " * PEG.js $(PEGJS_VERSION)" >> $(BROWSER_FILE_DEV) - echo ' *' >> $(BROWSER_FILE_DEV) - echo ' * http://pegjs.org/' >> $(BROWSER_FILE_DEV) - echo ' *' >> $(BROWSER_FILE_DEV) - echo ' * Copyright (c) 2010-2016 David Majda' >> $(BROWSER_FILE_DEV) - echo ' * Licensed under the MIT license.' >> $(BROWSER_FILE_DEV) - echo ' */' >> $(BROWSER_FILE_DEV) + echo "// PEG.js $(PEGJS_VERSION)" >> $(BROWSER_FILE_DEV) + echo '//' >> $(BROWSER_FILE_DEV) + echo '// http://pegjs.org/' >> $(BROWSER_FILE_DEV) + echo '//' >> $(BROWSER_FILE_DEV) + echo '// Copyright (c) 2010-2016 David Majda' >> $(BROWSER_FILE_DEV) + echo '// Licensed under the MIT license.' >> $(BROWSER_FILE_DEV) $(BROWSERIFY) \ --standalone peg \ diff --git a/benchmark/index.js b/benchmark/index.js index 9467b85..81ce776 100644 --- a/benchmark/index.js +++ b/benchmark/index.js @@ -6,7 +6,7 @@ let Runner = require("./runner.js"); $("#run").click(() => { "use strict"; - /* Results Table Manipulation */ + // Results Table Manipulation let resultsTable = $("#results-table"); @@ -49,18 +49,16 @@ $("#run").click(() => { ); } - /* Main */ + // Main - /* - * Each input is parsed multiple times and the results are averaged. We - * do this for two reasons: - * - * 1. To warm up the interpreter (PEG.js-generated parsers will be - * most likely used repeatedly, so it makes sense to measure - * performance after warming up). - * - * 2. To minimize random errors. - */ + // Each input is parsed multiple times and the results are averaged. We + // do this for two reasons: + // + // 1. To warm up the interpreter (PEG.js-generated parsers will be + // most likely used repeatedly, so it makes sense to measure + // performance after warming up). + // + // 2. To minimize random errors. let runCount = parseInt($("#run-count").val(), 10); let options = { @@ -84,7 +82,7 @@ $("#run").click(() => { }, testStart: function() { - /* Nothing to do. */ + // Nothing to do. }, testFinish: function(benchmark, test, inputSize, parseTime) { diff --git a/benchmark/run b/benchmark/run index ee67203..7384541 100755 --- a/benchmark/run +++ b/benchmark/run @@ -10,7 +10,7 @@ let fs = require("fs"); let benchmarks = require("./benchmarks.js"); let Runner = require("./runner.js"); -/* Results Table Manipulation */ +// Results Table Manipulation function dup(text, count) { let result = ""; @@ -70,7 +70,7 @@ function writeTableFooter() { console.log("└─────────────────────────────────────┴───────────┴────────────┴──────────────┘"); } -/* Helpers */ +// Helpers function printHelp() { console.log("Usage: run [options]"); @@ -97,7 +97,7 @@ function abort(message) { exitFailure(); } -/* Arguments */ +// Arguments let args = process.argv.slice(2); // Trim "node" and the script path. @@ -109,7 +109,7 @@ function nextArg() { args.shift(); } -/* Main */ +// Main let runCount = 10; let options = { @@ -169,7 +169,7 @@ Runner.run(benchmarks, runCount, options, { }, testStart: function() { - /* Nothing to do. */ + // Nothing to do. }, testFinish: function(benchmark, test, inputSize, parseTime) { diff --git a/benchmark/runner.js b/benchmark/runner.js index b61886e..3ca9948 100644 --- a/benchmark/runner.js +++ b/benchmark/runner.js @@ -7,7 +7,7 @@ let peg = require("../lib/peg"); let Runner = { run: function(benchmarks, runCount, options, callbacks) { - /* Queue */ + // Queue let Q = { functions: [], @@ -20,29 +20,25 @@ let Runner = { if (this.functions.length > 0) { this.functions.shift()(); - /* - * We can't use |arguments.callee| here because |this| would get - * messed-up in that case. - */ + // We can't use |arguments.callee| here because |this| would get + // messed-up in that case. setTimeout(() => { Q.run(); }, 0); } } }; - /* - * The benchmark itself is factored out into several functions (some of them - * generated), which are enqueued and run one by one using |setTimeout|. We - * do this for two reasons: - * - * 1. To avoid bowser mechanism for interrupting long-running scripts to - * kick-in (or at least to not kick-in that often). - * - * 2. To ensure progressive rendering of results in the browser (some - * browsers do not render at all when running JavaScript code). - * - * The enqueued functions share state, which is all stored in the properties - * of the |state| object. - */ + // The benchmark itself is factored out into several functions (some of them + // generated), which are enqueued and run one by one using |setTimeout|. We + // do this for two reasons: + // + // 1. To avoid bowser mechanism for interrupting long-running scripts to + // kick-in (or at least to not kick-in that often). + // + // 2. To ensure progressive rendering of results in the browser (some + // browsers do not render at all when running JavaScript code). + // + // The enqueued functions share state, which is all stored in the properties + // of the |state| object. let state = {}; @@ -104,7 +100,7 @@ let Runner = { callbacks.finish(state.totalInputSize, state.totalParseTime); } - /* Main */ + // Main Q.add(initialize); benchmarks.forEach(benchmark => { diff --git a/benchmark/server b/benchmark/server index b2b8c22..2e3d296 100755 --- a/benchmark/server +++ b/benchmark/server @@ -5,11 +5,9 @@ "use strict"; -/* - * Small server whose main purpose is to ensure that both the benchmarked code - * and the benchmark get passed through Babel & Browserify before they are - * served to the browser. - */ +// Small server whose main purpose is to ensure that both the benchmarked code +// and the benchmark get passed through Babel & Browserify before they are +// served to the browser. let express = require("express"); let logger = require("morgan"); diff --git a/bin/pegjs b/bin/pegjs index f7db65a..a2a6f7c 100755 --- a/bin/pegjs +++ b/bin/pegjs @@ -6,7 +6,7 @@ let fs = require("fs"); let path = require("path"); let peg = require("../lib/peg"); -/* Helpers */ +// Helpers function printVersion() { console.log("PEG.js " + peg.VERSION); @@ -74,15 +74,13 @@ function addExtraOptions(options, json) { }); } -/* - * Extracted into a function just to silence JSHint complaining about creating - * functions in a loop. - */ +// Extracted into a function just to silence JSHint complaining about creating +// functions in a loop. function trim(s) { return s.trim(); } -/* Arguments */ +// Arguments let args = process.argv.slice(2); // Trim "node" and the script path. @@ -94,7 +92,7 @@ function nextArg() { args.shift(); } -/* Files */ +// Files function readStream(inputStream, callback) { let input = ""; @@ -102,7 +100,7 @@ function readStream(inputStream, callback) { inputStream.on("end", () => { callback(input); }); } -/* Main */ +// Main let inputFile = null; let outputFile = null; diff --git a/examples/arithmetics.pegjs b/examples/arithmetics.pegjs index 5cd36cf..ef70564 100644 --- a/examples/arithmetics.pegjs +++ b/examples/arithmetics.pegjs @@ -1,9 +1,7 @@ -/* - * Simple Arithmetics Grammar - * ========================== - * - * Accepts expressions like "2 * (3 + 4)" and computes their value. - */ +// Simple Arithmetics Grammar +// ========================== +// +// Accepts expressions like "2 * (3 + 4)" and computes their value. Expression = head:Term tail:(_ ("+" / "-") _ Term)* { diff --git a/examples/css.pegjs b/examples/css.pegjs index cbe64e7..b2d5678 100644 --- a/examples/css.pegjs +++ b/examples/css.pegjs @@ -1,23 +1,21 @@ -/* - * CSS Grammar - * =========== - * - * Based on grammar from CSS 2.1 specification [1] (including the errata [2]). - * Generated parser builds a syntax tree composed of nested JavaScript objects, - * vaguely inspired by CSS DOM [3]. The CSS DOM itself wasn't used as it is not - * expressive enough (e.g. selectors are reflected as text, not structured - * objects) and somewhat cumbersome. - * - * Limitations: - * - * * Many errors which should be recovered from according to the specification - * (e.g. malformed declarations or unexpected end of stylesheet) are fatal. - * This is a result of straightforward rewrite of the CSS grammar to PEG.js. - * - * [1] http://www.w3.org/TR/2011/REC-CSS2-20110607 - * [2] http://www.w3.org/Style/css2-updates/REC-CSS2-20110607-errata.html - * [3] http://www.w3.org/TR/DOM-Level-2-Style/css.html - */ +// CSS Grammar +// =========== +// +// Based on grammar from CSS 2.1 specification [1] (including the errata [2]). +// Generated parser builds a syntax tree composed of nested JavaScript objects, +// vaguely inspired by CSS DOM [3]. The CSS DOM itself wasn't used as it is not +// expressive enough (e.g. selectors are reflected as text, not structured +// objects) and somewhat cumbersome. +// +// Limitations: +// +// * Many errors which should be recovered from according to the specification +// (e.g. malformed declarations or unexpected end of stylesheet) are fatal. +// This is a result of straightforward rewrite of the CSS grammar to PEG.js. +// +// [1] http://www.w3.org/TR/2011/REC-CSS2-20110607 +// [2] http://www.w3.org/Style/css2-updates/REC-CSS2-20110607-errata.html +// [3] http://www.w3.org/TR/DOM-Level-2-Style/css.html { function extractOptional(optional, index) { @@ -49,7 +47,7 @@ start = stylesheet:stylesheet comment* { return stylesheet; } -/* ----- G.1 Grammar ----- */ +// ----- G.1 Grammar ----- stylesheet = charset:(CHARSET_SYM STRING ";")? (S / CDO / CDC)* @@ -244,9 +242,9 @@ function hexcolor = value:HASH S* { return { type: "Hexcolor", value: value }; } -/* ----- G.2 Lexical scanner ----- */ +// ----- G.2 Lexical scanner ----- -/* Macros */ +// Macros h = [0-9a-f]i @@ -338,7 +336,7 @@ U = "u"i / "\\" "0"? "0"? "0"? "0"? [\x55\x75] ("\r\n" / [ \t\r\n\f])? / "\\u"i X = "x"i / "\\" "0"? "0"? "0"? "0"? [\x58\x78] ("\r\n" / [ \t\r\n\f])? / "\\x"i { return "x"; } Z = "z"i / "\\" "0"? "0"? "0"? "0"? [\x5a\x7a] ("\r\n" / [ \t\r\n\f])? / "\\z"i { return "z"; } -/* Tokens */ +// Tokens S "whitespace" = comment* s @@ -376,7 +374,7 @@ MEDIA_SYM "@media" CHARSET_SYM "@charset" = comment* "@charset " -/* We use |s| instead of |w| here to avoid infinite recursion. */ +// We use |s| instead of |w| here to avoid infinite recursion. IMPORTANT_SYM "!important" = comment* "!" (s / comment)* I M P O R T A N T diff --git a/examples/javascript.pegjs b/examples/javascript.pegjs index 35be384..2a76fa0 100644 --- a/examples/javascript.pegjs +++ b/examples/javascript.pegjs @@ -1,36 +1,34 @@ -/* - * JavaScript Grammar - * ================== - * - * Based on grammar from ECMA-262, 5.1 Edition [1]. Generated parser builds a - * syntax tree compatible with the ESTree spec [2]. - * - * Limitations: - * - * * Non-BMP characters are completely ignored to avoid surrogate pair - * handling. - * - * * One can create identifiers containing illegal characters using Unicode - * escape sequences. For example, "abcd\u0020efgh" is not a valid - * identifier, but it is accepted by the parser. - * - * * Strict mode is not recognized. This means that within strict mode code, - * "implements", "interface", "let", "package", "private", "protected", - * "public", "static" and "yield" can be used as names. Many other - * restrictions and exceptions from Annex C are also not applied. - * - * All the limitations could be resolved, but the costs would likely outweigh - * the benefits. - * - * Many thanks to inimino [3] for his grammar [4] which helped me to solve some - * problems (such as automatic semicolon insertion) and also served to double - * check that I converted the original grammar correctly. - * - * [1] http://www.ecma-international.org/publications/standards/Ecma-262.htm - * [2] https://github.com/estree/estree - * [3] http://inimino.org/~inimino/blog/ - * [4] http://boshi.inimino.org/3box/asof/1270029991384/PEG/ECMAScript_unified.peg - */ +// JavaScript Grammar +// ================== +// +// Based on grammar from ECMA-262, 5.1 Edition [1]. Generated parser builds a +// syntax tree compatible with the ESTree spec [2]. +// +// Limitations: +// +// * Non-BMP characters are completely ignored to avoid surrogate pair +// handling. +// +// * One can create identifiers containing illegal characters using Unicode +// escape sequences. For example, "abcd\u0020efgh" is not a valid +// identifier, but it is accepted by the parser. +// +// * Strict mode is not recognized. This means that within strict mode code, +// "implements", "interface", "let", "package", "private", "protected", +// "public", "static" and "yield" can be used as names. Many other +// restrictions and exceptions from Annex C are also not applied. +// +// All the limitations could be resolved, but the costs would likely outweigh +// the benefits. +// +// Many thanks to inimino [3] for his grammar [4] which helped me to solve some +// problems (such as automatic semicolon insertion) and also served to double +// check that I converted the original grammar correctly. +// +// [1] http://www.ecma-international.org/publications/standards/Ecma-262.htm +// [2] https://github.com/estree/estree +// [3] http://inimino.org/~inimino/blog/ +// [4] http://boshi.inimino.org/3box/asof/1270029991384/PEG/ECMAScript_unified.peg { var TYPES_TO_PROPERTY_NAMES = { @@ -85,7 +83,7 @@ Start = __ program:Program __ { return program; } -/* ----- A.1 Lexical Grammar ----- */ +// ----- A.1 Lexical Grammar ----- SourceCharacter = . @@ -222,10 +220,8 @@ BooleanLiteral = TrueToken { return { type: "Literal", value: true }; } / FalseToken { return { type: "Literal", value: false }; } -/* - * The "!(IdentifierStart / DecimalDigit)" predicate is not part of the official - * grammar, it comes from text in section 7.8.3. - */ +// The "!(IdentifierStart / DecimalDigit)" predicate is not part of the official +// grammar, it comes from text in section 7.8.3. NumericLiteral "number" = literal:HexIntegerLiteral !(IdentifierStart / DecimalDigit) { return literal; @@ -375,30 +371,28 @@ RegularExpressionClassChar RegularExpressionFlags = IdentifierPart* -/* - * Unicode Character Categories - * - * Extracted from the following Unicode Character Database file: - * - * http://www.unicode.org/Public/8.0.0/ucd/extracted/DerivedGeneralCategory.txt - * - * Unix magic used: - * - * grep "; $CATEGORY" DerivedGeneralCategory.txt | # Filter characters - * cut -f1 -d " " | # Extract code points - * grep -v '[0-9a-fA-F]\{5\}' | # Exclude non-BMP characters - * sed -e 's/\.\./-/' | # Adjust formatting - * sed -e 's/\([0-9a-fA-F]\{4\}\)/\\u\1/g' | # Adjust formatting - * tr -d '\n' # Join lines - * - * ECMA-262 allows using Unicode 3.0 or later, version 8.0.0 was the latest one - * at the time of writing. - * - * Non-BMP characters are completely ignored to avoid surrogate pair handling - * (detecting surrogate pairs isn't possible with a simple character class and - * other methods would degrade performance). I don't consider it a big deal as - * even parsers in JavaScript engines of common browsers seem to ignore them. - */ +// Unicode Character Categories +// +// Extracted from the following Unicode Character Database file: +// +// http://www.unicode.org/Public/8.0.0/ucd/extracted/DerivedGeneralCategory.txt +// +// Unix magic used: +// +// grep "; $CATEGORY" DerivedGeneralCategory.txt | # Filter characters +// cut -f1 -d " " | # Extract code points +// grep -v '[0-9a-fA-F]\{5\}' | # Exclude non-BMP characters +// sed -e 's/\.\./-/' | # Adjust formatting +// sed -e 's/\([0-9a-fA-F]\{4\}\)/\\u\1/g' | # Adjust formatting +// tr -d '\n' # Join lines +// +// ECMA-262 allows using Unicode 3.0 or later, version 8.0.0 was the latest one +// at the time of writing. +// +// Non-BMP characters are completely ignored to avoid surrogate pair handling +// (detecting surrogate pairs isn't possible with a simple character class and +// other methods would degrade performance). I don't consider it a big deal as +// even parsers in JavaScript engines of common browsers seem to ignore them. // Letter, Lowercase Ll = [\u0061-\u007A\u00B5\u00DF-\u00F6\u00F8-\u00FF\u0101\u0103\u0105\u0107\u0109\u010B\u010D\u010F\u0111\u0113\u0115\u0117\u0119\u011B\u011D\u011F\u0121\u0123\u0125\u0127\u0129\u012B\u012D\u012F\u0131\u0133\u0135\u0137-\u0138\u013A\u013C\u013E\u0140\u0142\u0144\u0146\u0148-\u0149\u014B\u014D\u014F\u0151\u0153\u0155\u0157\u0159\u015B\u015D\u015F\u0161\u0163\u0165\u0167\u0169\u016B\u016D\u016F\u0171\u0173\u0175\u0177\u017A\u017C\u017E-\u0180\u0183\u0185\u0188\u018C-\u018D\u0192\u0195\u0199-\u019B\u019E\u01A1\u01A3\u01A5\u01A8\u01AA-\u01AB\u01AD\u01B0\u01B4\u01B6\u01B9-\u01BA\u01BD-\u01BF\u01C6\u01C9\u01CC\u01CE\u01D0\u01D2\u01D4\u01D6\u01D8\u01DA\u01DC-\u01DD\u01DF\u01E1\u01E3\u01E5\u01E7\u01E9\u01EB\u01ED\u01EF-\u01F0\u01F3\u01F5\u01F9\u01FB\u01FD\u01FF\u0201\u0203\u0205\u0207\u0209\u020B\u020D\u020F\u0211\u0213\u0215\u0217\u0219\u021B\u021D\u021F\u0221\u0223\u0225\u0227\u0229\u022B\u022D\u022F\u0231\u0233-\u0239\u023C\u023F-\u0240\u0242\u0247\u0249\u024B\u024D\u024F-\u0293\u0295-\u02AF\u0371\u0373\u0377\u037B-\u037D\u0390\u03AC-\u03CE\u03D0-\u03D1\u03D5-\u03D7\u03D9\u03DB\u03DD\u03DF\u03E1\u03E3\u03E5\u03E7\u03E9\u03EB\u03ED\u03EF-\u03F3\u03F5\u03F8\u03FB-\u03FC\u0430-\u045F\u0461\u0463\u0465\u0467\u0469\u046B\u046D\u046F\u0471\u0473\u0475\u0477\u0479\u047B\u047D\u047F\u0481\u048B\u048D\u048F\u0491\u0493\u0495\u0497\u0499\u049B\u049D\u049F\u04A1\u04A3\u04A5\u04A7\u04A9\u04AB\u04AD\u04AF\u04B1\u04B3\u04B5\u04B7\u04B9\u04BB\u04BD\u04BF\u04C2\u04C4\u04C6\u04C8\u04CA\u04CC\u04CE-\u04CF\u04D1\u04D3\u04D5\u04D7\u04D9\u04DB\u04DD\u04DF\u04E1\u04E3\u04E5\u04E7\u04E9\u04EB\u04ED\u04EF\u04F1\u04F3\u04F5\u04F7\u04F9\u04FB\u04FD\u04FF\u0501\u0503\u0505\u0507\u0509\u050B\u050D\u050F\u0511\u0513\u0515\u0517\u0519\u051B\u051D\u051F\u0521\u0523\u0525\u0527\u0529\u052B\u052D\u052F\u0561-\u0587\u13F8-\u13FD\u1D00-\u1D2B\u1D6B-\u1D77\u1D79-\u1D9A\u1E01\u1E03\u1E05\u1E07\u1E09\u1E0B\u1E0D\u1E0F\u1E11\u1E13\u1E15\u1E17\u1E19\u1E1B\u1E1D\u1E1F\u1E21\u1E23\u1E25\u1E27\u1E29\u1E2B\u1E2D\u1E2F\u1E31\u1E33\u1E35\u1E37\u1E39\u1E3B\u1E3D\u1E3F\u1E41\u1E43\u1E45\u1E47\u1E49\u1E4B\u1E4D\u1E4F\u1E51\u1E53\u1E55\u1E57\u1E59\u1E5B\u1E5D\u1E5F\u1E61\u1E63\u1E65\u1E67\u1E69\u1E6B\u1E6D\u1E6F\u1E71\u1E73\u1E75\u1E77\u1E79\u1E7B\u1E7D\u1E7F\u1E81\u1E83\u1E85\u1E87\u1E89\u1E8B\u1E8D\u1E8F\u1E91\u1E93\u1E95-\u1E9D\u1E9F\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7\u1EB9\u1EBB\u1EBD\u1EBF\u1EC1\u1EC3\u1EC5\u1EC7\u1EC9\u1ECB\u1ECD\u1ECF\u1ED1\u1ED3\u1ED5\u1ED7\u1ED9\u1EDB\u1EDD\u1EDF\u1EE1\u1EE3\u1EE5\u1EE7\u1EE9\u1EEB\u1EED\u1EEF\u1EF1\u1EF3\u1EF5\u1EF7\u1EF9\u1EFB\u1EFD\u1EFF-\u1F07\u1F10-\u1F15\u1F20-\u1F27\u1F30-\u1F37\u1F40-\u1F45\u1F50-\u1F57\u1F60-\u1F67\u1F70-\u1F7D\u1F80-\u1F87\u1F90-\u1F97\u1FA0-\u1FA7\u1FB0-\u1FB4\u1FB6-\u1FB7\u1FBE\u1FC2-\u1FC4\u1FC6-\u1FC7\u1FD0-\u1FD3\u1FD6-\u1FD7\u1FE0-\u1FE7\u1FF2-\u1FF4\u1FF6-\u1FF7\u210A\u210E-\u210F\u2113\u212F\u2134\u2139\u213C-\u213D\u2146-\u2149\u214E\u2184\u2C30-\u2C5E\u2C61\u2C65-\u2C66\u2C68\u2C6A\u2C6C\u2C71\u2C73-\u2C74\u2C76-\u2C7B\u2C81\u2C83\u2C85\u2C87\u2C89\u2C8B\u2C8D\u2C8F\u2C91\u2C93\u2C95\u2C97\u2C99\u2C9B\u2C9D\u2C9F\u2CA1\u2CA3\u2CA5\u2CA7\u2CA9\u2CAB\u2CAD\u2CAF\u2CB1\u2CB3\u2CB5\u2CB7\u2CB9\u2CBB\u2CBD\u2CBF\u2CC1\u2CC3\u2CC5\u2CC7\u2CC9\u2CCB\u2CCD\u2CCF\u2CD1\u2CD3\u2CD5\u2CD7\u2CD9\u2CDB\u2CDD\u2CDF\u2CE1\u2CE3-\u2CE4\u2CEC\u2CEE\u2CF3\u2D00-\u2D25\u2D27\u2D2D\uA641\uA643\uA645\uA647\uA649\uA64B\uA64D\uA64F\uA651\uA653\uA655\uA657\uA659\uA65B\uA65D\uA65F\uA661\uA663\uA665\uA667\uA669\uA66B\uA66D\uA681\uA683\uA685\uA687\uA689\uA68B\uA68D\uA68F\uA691\uA693\uA695\uA697\uA699\uA69B\uA723\uA725\uA727\uA729\uA72B\uA72D\uA72F-\uA731\uA733\uA735\uA737\uA739\uA73B\uA73D\uA73F\uA741\uA743\uA745\uA747\uA749\uA74B\uA74D\uA74F\uA751\uA753\uA755\uA757\uA759\uA75B\uA75D\uA75F\uA761\uA763\uA765\uA767\uA769\uA76B\uA76D\uA76F\uA771-\uA778\uA77A\uA77C\uA77F\uA781\uA783\uA785\uA787\uA78C\uA78E\uA791\uA793-\uA795\uA797\uA799\uA79B\uA79D\uA79F\uA7A1\uA7A3\uA7A5\uA7A7\uA7A9\uA7B5\uA7B7\uA7FA\uAB30-\uAB5A\uAB60-\uAB65\uAB70-\uABBF\uFB00-\uFB06\uFB13-\uFB17\uFF41-\uFF5A] @@ -433,7 +427,7 @@ Pc = [\u005F\u203F-\u2040\u2054\uFE33-\uFE34\uFE4D-\uFE4F\uFF3F] // Separator, Space Zs = [\u0020\u00A0\u1680\u2000-\u200A\u202F\u205F\u3000] -/* Tokens */ +// Tokens BreakToken = "break" !IdentifierPart CaseToken = "case" !IdentifierPart @@ -474,7 +468,7 @@ VoidToken = "void" !IdentifierPart WhileToken = "while" !IdentifierPart WithToken = "with" !IdentifierPart -/* Skipped */ +// Skipped __ = (WhiteSpace / LineTerminatorSequence / Comment)* @@ -482,7 +476,7 @@ __ _ = (WhiteSpace / MultiLineCommentNoLineTerminator)* -/* Automatic Semicolon Insertion */ +// Automatic Semicolon Insertion EOS = __ ";" @@ -493,11 +487,11 @@ EOS EOF = !. -/* ----- A.2 Number Conversions ----- */ +// ----- A.2 Number Conversions ----- -/* Irrelevant. */ +// Irrelevant. -/* ----- A.3 Expressions ----- */ +// ----- A.3 Expressions ----- PrimaryExpression = ThisToken { return { type: "ThisExpression" }; } @@ -962,7 +956,7 @@ ExpressionNoIn : head; } -/* ----- A.4 Statements ----- */ +// ----- A.4 Statements ----- Statement = Block @@ -1274,7 +1268,7 @@ Finally DebuggerStatement = DebuggerToken EOS { return { type: "DebuggerStatement" }; } -/* ----- A.5 Functions and Programs ----- */ +// ----- A.5 Functions and Programs ----- FunctionDeclaration = FunctionToken __ id:Identifier __ @@ -1332,14 +1326,14 @@ SourceElement = Statement / FunctionDeclaration -/* ----- A.6 Universal Resource Identifier Character Classes ----- */ +// ----- A.6 Universal Resource Identifier Character Classes ----- -/* Irrelevant. */ +// Irrelevant. -/* ----- A.7 Regular Expressions ----- */ +// ----- A.7 Regular Expressions ----- -/* Irrelevant. */ +// Irrelevant. -/* ----- A.8 JSON ----- */ +// ----- A.8 JSON ----- -/* Irrelevant. */ +// Irrelevant. diff --git a/examples/json.pegjs b/examples/json.pegjs index e0afbbf..aaf0951 100644 --- a/examples/json.pegjs +++ b/examples/json.pegjs @@ -1,21 +1,19 @@ -/* - * JSON Grammar - * ============ - * - * Based on the grammar from RFC 7159 [1]. - * - * Note that JSON is also specified in ECMA-262 [2], ECMA-404 [3], and on the - * JSON website [4] (somewhat informally). The RFC seems the most authoritative - * source, which is confirmed e.g. by [5]. - * - * [1] http://tools.ietf.org/html/rfc7159 - * [2] http://www.ecma-international.org/publications/standards/Ecma-262.htm - * [3] http://www.ecma-international.org/publications/standards/Ecma-404.htm - * [4] http://json.org/ - * [5] https://www.tbray.org/ongoing/When/201x/2014/03/05/RFC7159-JSON - */ - -/* ----- 2. JSON Grammar ----- */ +// JSON Grammar +// ============ +// +// Based on the grammar from RFC 7159 [1]. +// +// Note that JSON is also specified in ECMA-262 [2], ECMA-404 [3], and on the +// JSON website [4] (somewhat informally). The RFC seems the most authoritative +// source, which is confirmed e.g. by [5]. +// +// [1] http://tools.ietf.org/html/rfc7159 +// [2] http://www.ecma-international.org/publications/standards/Ecma-262.htm +// [3] http://www.ecma-international.org/publications/standards/Ecma-404.htm +// [4] http://json.org/ +// [5] https://www.tbray.org/ongoing/When/201x/2014/03/05/RFC7159-JSON + +// ----- 2. JSON Grammar ----- JSON_text = ws value:value ws { return value; } @@ -29,7 +27,7 @@ value_separator = ws "," ws ws "whitespace" = [ \t\n\r]* -/* ----- 3. Values ----- */ +// ----- 3. Values ----- value = false @@ -44,7 +42,7 @@ false = "false" { return false; } null = "null" { return null; } true = "true" { return true; } -/* ----- 4. Objects ----- */ +// ----- 4. Objects ----- object = begin_object @@ -69,7 +67,7 @@ member return { name: name, value: value }; } -/* ----- 5. Arrays ----- */ +// ----- 5. Arrays ----- array = begin_array @@ -81,7 +79,7 @@ array end_array { return values !== null ? values : []; } -/* ----- 6. Numbers ----- */ +// ----- 6. Numbers ----- number "number" = minus? int frac? exp? { return parseFloat(text()); } @@ -96,7 +94,7 @@ minus = "-" plus = "+" zero = "0" -/* ----- 7. Strings ----- */ +// ----- 7. Strings ----- string "string" = quotation_mark chars:char* quotation_mark { return chars.join(""); } @@ -123,8 +121,8 @@ escape = "\\" quotation_mark = '"' unescaped = [^\0-\x1F\x22\x5C] -/* ----- Core ABNF Rules ----- */ +// ----- Core ABNF Rules ----- -/* See RFC 4234, Appendix B (http://tools.ietf.org/html/rfc4627). */ +// See RFC 4234, Appendix B (http://tools.ietf.org/html/rfc4627). DIGIT = [0-9] HEXDIG = [0-9a-f]i diff --git a/lib/compiler/asts.js b/lib/compiler/asts.js index f87702d..897912d 100644 --- a/lib/compiler/asts.js +++ b/lib/compiler/asts.js @@ -2,7 +2,7 @@ let visitor = require("./visitor"); -/* AST utilities. */ +// AST utilities. let asts = { findRule: function(ast, name) { for (let i = 0; i < ast.rules.length; i++) { diff --git a/lib/compiler/index.js b/lib/compiler/index.js index 72f4bbe..d7576f1 100644 --- a/lib/compiler/index.js +++ b/lib/compiler/index.js @@ -17,19 +17,15 @@ function processOptions(options, defaults) { } let compiler = { - /* - * AST node visitor builder. Useful mainly for plugins which manipulate the - * AST. - */ + // AST node visitor builder. Useful mainly for plugins which manipulate the + // AST. visitor: require("./visitor"), - /* - * Compiler passes. - * - * Each pass is a function that is passed the AST. It can perform checks on it - * or modify it as needed. If the pass encounters a semantic error, it throws - * |peg.GrammarError|. - */ + // Compiler passes. + // + // Each pass is a function that is passed the AST. It can perform checks on it + // or modify it as needed. If the pass encounters a semantic error, it throws + // |peg.GrammarError|. passes: { check: { reportUndefinedRules: require("./passes/report-undefined-rules"), @@ -47,12 +43,10 @@ let compiler = { } }, - /* - * Generates a parser from a specified grammar AST. Throws |peg.GrammarError| - * if the AST contains a semantic error. Note that not all errors are detected - * during the generation and some may protrude to the generated parser and - * cause its malfunction. - */ + // Generates a parser from a specified grammar AST. Throws |peg.GrammarError| + // if the AST contains a semantic error. Note that not all errors are detected + // during the generation and some may protrude to the generated parser and + // cause its malfunction. compile: function(ast, passes, options) { options = options !== undefined ? options : {}; diff --git a/lib/compiler/js.js b/lib/compiler/js.js index f0d9d84..4d9ec4e 100644 --- a/lib/compiler/js.js +++ b/lib/compiler/js.js @@ -2,17 +2,15 @@ function hex(ch) { return ch.charCodeAt(0).toString(16).toUpperCase(); } -/* JavaScript code generation helpers. */ +// JavaScript code generation helpers. let js = { stringEscape: function(s) { - /* - * ECMA-262, 5th ed., 7.8.4: All characters may appear literally in a string - * literal except for the closing quote character, backslash, carriage - * return, line separator, paragraph separator, and line feed. Any character - * may appear in the form of an escape sequence. - * - * For portability, we also escape all control and non-ASCII characters. - */ + // ECMA-262, 5th ed., 7.8.4: All characters may appear literally in a string + // literal except for the closing quote character, backslash, carriage + // return, line separator, paragraph separator, and line feed. Any character + // may appear in the form of an escape sequence. + // + // For portability, we also escape all control and non-ASCII characters. return s .replace(/\\/g, '\\\\') // backslash .replace(/"/g, '\\"') // closing double quote @@ -30,11 +28,9 @@ let js = { }, regexpClassEscape: function(s) { - /* - * Based on ECMA-262, 5th ed., 7.8.5 & 15.10.1. - * - * For portability, we also escape all control and non-ASCII characters. - */ + // Based on ECMA-262, 5th ed., 7.8.5 & 15.10.1. + // + // For portability, we also escape all control and non-ASCII characters. return s .replace(/\\/g, '\\\\') // backslash .replace(/\//g, '\\/') // closing slash diff --git a/lib/compiler/opcodes.js b/lib/compiler/opcodes.js index da5ff4b..58e09b5 100644 --- a/lib/compiler/opcodes.js +++ b/lib/compiler/opcodes.js @@ -1,8 +1,8 @@ "use strict"; -/* Bytecode instruction opcodes. */ +// Bytecode instruction opcodes. let opcodes = { - /* Stack Manipulation */ + // Stack Manipulation PUSH: 0, // PUSH c PUSH_UNDEFINED: 1, // PUSH_UNDEFINED @@ -18,14 +18,14 @@ let opcodes = { WRAP: 11, // WRAP n TEXT: 12, // TEXT - /* Conditions and Loops */ + // Conditions and Loops IF: 13, // IF t, f IF_ERROR: 14, // IF_ERROR t, f IF_NOT_ERROR: 15, // IF_NOT_ERROR t, f WHILE_NOT_ERROR: 16, // WHILE_NOT_ERROR b - /* Matching */ + // Matching MATCH_ANY: 17, // MATCH_ANY a, f, ... MATCH_STRING: 18, // MATCH_STRING s, a, f, ... @@ -35,17 +35,17 @@ let opcodes = { ACCEPT_STRING: 22, // ACCEPT_STRING s FAIL: 23, // FAIL e - /* Calls */ + // Calls LOAD_SAVED_POS: 24, // LOAD_SAVED_POS p UPDATE_SAVED_POS: 25, // UPDATE_SAVED_POS CALL: 26, // CALL f, n, pc, p1, p2, ..., pN - /* Rules */ + // Rules RULE: 27, // RULE r - /* Failure Reporting */ + // Failure Reporting SILENT_FAILS_ON: 28, // SILENT_FAILS_ON SILENT_FAILS_OFF: 29 // SILENT_FAILS_OFF diff --git a/lib/compiler/passes/generate-bytecode.js b/lib/compiler/passes/generate-bytecode.js index c4644e5..e84cfcc 100644 --- a/lib/compiler/passes/generate-bytecode.js +++ b/lib/compiler/passes/generate-bytecode.js @@ -5,189 +5,188 @@ let visitor = require("../visitor"); let op = require("../opcodes"); let js = require("../js"); -/* Generates bytecode. - * - * Instructions - * ============ - * - * Stack Manipulation - * ------------------ - * - * [0] PUSH c - * - * stack.push(consts[c]); - * - * [1] PUSH_UNDEFINED - * - * stack.push(undefined); - * - * [2] PUSH_NULL - * - * stack.push(null); - * - * [3] PUSH_FAILED - * - * stack.push(FAILED); - * - * [4] PUSH_EMPTY_ARRAY - * - * stack.push([]); - * - * [5] PUSH_CURR_POS - * - * stack.push(currPos); - * - * [6] POP - * - * stack.pop(); - * - * [7] POP_CURR_POS - * - * currPos = stack.pop(); - * - * [8] POP_N n - * - * stack.pop(n); - * - * [9] NIP - * - * value = stack.pop(); - * stack.pop(); - * stack.push(value); - * - * [10] APPEND - * - * value = stack.pop(); - * array = stack.pop(); - * array.push(value); - * stack.push(array); - * - * [11] WRAP n - * - * stack.push(stack.pop(n)); - * - * [12] TEXT - * - * stack.push(input.substring(stack.pop(), currPos)); - * - * Conditions and Loops - * -------------------- - * - * [13] IF t, f - * - * if (stack.top()) { - * interpret(ip + 3, ip + 3 + t); - * } else { - * interpret(ip + 3 + t, ip + 3 + t + f); - * } - * - * [14] IF_ERROR t, f - * - * if (stack.top() === FAILED) { - * interpret(ip + 3, ip + 3 + t); - * } else { - * interpret(ip + 3 + t, ip + 3 + t + f); - * } - * - * [15] IF_NOT_ERROR t, f - * - * if (stack.top() !== FAILED) { - * interpret(ip + 3, ip + 3 + t); - * } else { - * interpret(ip + 3 + t, ip + 3 + t + f); - * } - * - * [16] WHILE_NOT_ERROR b - * - * while(stack.top() !== FAILED) { - * interpret(ip + 2, ip + 2 + b); - * } - * - * Matching - * -------- - * - * [17] MATCH_ANY a, f, ... - * - * if (input.length > currPos) { - * interpret(ip + 3, ip + 3 + a); - * } else { - * interpret(ip + 3 + a, ip + 3 + a + f); - * } - * - * [18] MATCH_STRING s, a, f, ... - * - * if (input.substr(currPos, consts[s].length) === consts[s]) { - * interpret(ip + 4, ip + 4 + a); - * } else { - * interpret(ip + 4 + a, ip + 4 + a + f); - * } - * - * [19] MATCH_STRING_IC s, a, f, ... - * - * if (input.substr(currPos, consts[s].length).toLowerCase() === consts[s]) { - * interpret(ip + 4, ip + 4 + a); - * } else { - * interpret(ip + 4 + a, ip + 4 + a + f); - * } - * - * [20] MATCH_REGEXP r, a, f, ... - * - * if (consts[r].test(input.charAt(currPos))) { - * interpret(ip + 4, ip + 4 + a); - * } else { - * interpret(ip + 4 + a, ip + 4 + a + f); - * } - * - * [21] ACCEPT_N n - * - * stack.push(input.substring(currPos, n)); - * currPos += n; - * - * [22] ACCEPT_STRING s - * - * stack.push(consts[s]); - * currPos += consts[s].length; - * - * [23] FAIL e - * - * stack.push(FAILED); - * fail(consts[e]); - * - * Calls - * ----- - * - * [24] LOAD_SAVED_POS p - * - * savedPos = stack[p]; - * - * [25] UPDATE_SAVED_POS - * - * savedPos = currPos; - * - * [26] CALL f, n, pc, p1, p2, ..., pN - * - * value = consts[f](stack[p1], ..., stack[pN]); - * stack.pop(n); - * stack.push(value); - * - * Rules - * ----- - * - * [27] RULE r - * - * stack.push(parseRule(r)); - * - * Failure Reporting - * ----------------- - * - * [28] SILENT_FAILS_ON - * - * silentFails++; - * - * [29] SILENT_FAILS_OFF - * - * silentFails--; - */ +// Generates bytecode. +// +// Instructions +// ============ +// +// Stack Manipulation +// ------------------ +// +// [0] PUSH c +// +// stack.push(consts[c]); +// +// [1] PUSH_UNDEFINED +// +// stack.push(undefined); +// +// [2] PUSH_NULL +// +// stack.push(null); +// +// [3] PUSH_FAILED +// +// stack.push(FAILED); +// +// [4] PUSH_EMPTY_ARRAY +// +// stack.push([]); +// +// [5] PUSH_CURR_POS +// +// stack.push(currPos); +// +// [6] POP +// +// stack.pop(); +// +// [7] POP_CURR_POS +// +// currPos = stack.pop(); +// +// [8] POP_N n +// +// stack.pop(n); +// +// [9] NIP +// +// value = stack.pop(); +// stack.pop(); +// stack.push(value); +// +// [10] APPEND +// +// value = stack.pop(); +// array = stack.pop(); +// array.push(value); +// stack.push(array); +// +// [11] WRAP n +// +// stack.push(stack.pop(n)); +// +// [12] TEXT +// +// stack.push(input.substring(stack.pop(), currPos)); +// +// Conditions and Loops +// -------------------- +// +// [13] IF t, f +// +// if (stack.top()) { +// interpret(ip + 3, ip + 3 + t); +// } else { +// interpret(ip + 3 + t, ip + 3 + t + f); +// } +// +// [14] IF_ERROR t, f +// +// if (stack.top() === FAILED) { +// interpret(ip + 3, ip + 3 + t); +// } else { +// interpret(ip + 3 + t, ip + 3 + t + f); +// } +// +// [15] IF_NOT_ERROR t, f +// +// if (stack.top() !== FAILED) { +// interpret(ip + 3, ip + 3 + t); +// } else { +// interpret(ip + 3 + t, ip + 3 + t + f); +// } +// +// [16] WHILE_NOT_ERROR b +// +// while(stack.top() !== FAILED) { +// interpret(ip + 2, ip + 2 + b); +// } +// +// Matching +// -------- +// +// [17] MATCH_ANY a, f, ... +// +// if (input.length > currPos) { +// interpret(ip + 3, ip + 3 + a); +// } else { +// interpret(ip + 3 + a, ip + 3 + a + f); +// } +// +// [18] MATCH_STRING s, a, f, ... +// +// if (input.substr(currPos, consts[s].length) === consts[s]) { +// interpret(ip + 4, ip + 4 + a); +// } else { +// interpret(ip + 4 + a, ip + 4 + a + f); +// } +// +// [19] MATCH_STRING_IC s, a, f, ... +// +// if (input.substr(currPos, consts[s].length).toLowerCase() === consts[s]) { +// interpret(ip + 4, ip + 4 + a); +// } else { +// interpret(ip + 4 + a, ip + 4 + a + f); +// } +// +// [20] MATCH_REGEXP r, a, f, ... +// +// if (consts[r].test(input.charAt(currPos))) { +// interpret(ip + 4, ip + 4 + a); +// } else { +// interpret(ip + 4 + a, ip + 4 + a + f); +// } +// +// [21] ACCEPT_N n +// +// stack.push(input.substring(currPos, n)); +// currPos += n; +// +// [22] ACCEPT_STRING s +// +// stack.push(consts[s]); +// currPos += consts[s].length; +// +// [23] FAIL e +// +// stack.push(FAILED); +// fail(consts[e]); +// +// Calls +// ----- +// +// [24] LOAD_SAVED_POS p +// +// savedPos = stack[p]; +// +// [25] UPDATE_SAVED_POS +// +// savedPos = currPos; +// +// [26] CALL f, n, pc, p1, p2, ..., pN +// +// value = consts[f](stack[p1], ..., stack[pN]); +// stack.pop(n); +// stack.push(value); +// +// Rules +// ----- +// +// [27] RULE r +// +// stack.push(parseRule(r)); +// +// Failure Reporting +// ----------------- +// +// [28] SILENT_FAILS_ON +// +// silentFails++; +// +// [29] SILENT_FAILS_OFF +// +// silentFails--; function generateBytecode(ast) { let consts = []; @@ -308,12 +307,10 @@ function generateBytecode(ast) { 'peg$otherExpectation("' + js.stringEscape(node.name) + '")' ); - /* - * The code generated below is slightly suboptimal because |FAIL| pushes - * to the stack, so we need to stick a |POP| in front of it. We lack a - * dedicated instruction that would just report the failure and not touch - * the stack. - */ + // The code generated below is slightly suboptimal because |FAIL| pushes + // to the stack, so we need to stick a |POP| in front of it. We lack a + // dedicated instruction that would just report the failure and not touch + // the stack. return buildSequence( [op.SILENT_FAILS_ON], generate(node.expression, context), @@ -551,11 +548,9 @@ function generateBytecode(ast) { + ')' ); - /* - * For case-sensitive strings the value must match the beginning of the - * remaining input exactly. As a result, we can use |ACCEPT_STRING| and - * save one |substr| call that would be needed if we used |ACCEPT_N|. - */ + // For case-sensitive strings the value must match the beginning of the + // remaining input exactly. As a result, we can use |ACCEPT_STRING| and + // save one |substr| call that would be needed if we used |ACCEPT_N|. return buildCondition( node.ignoreCase ? [op.MATCH_STRING_IC, stringIndex] diff --git a/lib/compiler/passes/generate-js.js b/lib/compiler/passes/generate-js.js index e5429ee..b6d0a52 100644 --- a/lib/compiler/passes/generate-js.js +++ b/lib/compiler/passes/generate-js.js @@ -4,9 +4,9 @@ let asts = require("../asts"); let op = require("../opcodes"); let js = require("../js"); -/* Generates parser JavaScript code. */ +// Generates parser JavaScript code. function generateJS(ast, options) { - /* These only indent non-empty lines to avoid trailing whitespace. */ + // These only indent non-empty lines to avoid trailing whitespace. function indent2(code) { return code.replace(/^(.+)$/gm, ' $1'); } function indent10(code) { return code.replace(/^(.+)$/gm, ' $1'); } @@ -222,13 +222,11 @@ function generateJS(ast, options) { parts.push(indent2(generateRuleHeader('peg$ruleNames[index]', 'index'))); parts.push([ - /* - * The point of the outer loop and the |ips| & |ends| stacks is to avoid - * recursive calls for interpreting parts of bytecode. In other words, we - * implement the |interpret| operation of the abstract machine without - * function calls. Such calls would likely slow the parser down and more - * importantly cause stack overflows for complex grammars. - */ + // The point of the outer loop and the |ips| & |ends| stacks is to avoid + // recursive calls for interpreting parts of bytecode. In other words, we + // implement the |interpret| operation of the abstract machine without + // function calls. Such calls would likely slow the parser down and more + // importantly cause stack overflows for complex grammars. ' while (true) {', ' while (ip < end) {', ' switch (bc[ip]) {', @@ -1201,11 +1199,9 @@ function generateJS(ast, options) { function generateWrapper(toplevelCode) { function generateGeneratedByComment() { return [ - '/*', - ' * Generated by PEG.js 0.10.0.', - ' *', - ' * http://pegjs.org/', - ' */' + '// Generated by PEG.js 0.10.0.', + '//', + '// http://pegjs.org/' ].join('\n'); } diff --git a/lib/compiler/passes/remove-proxy-rules.js b/lib/compiler/passes/remove-proxy-rules.js index dc8d2f4..c208fe3 100644 --- a/lib/compiler/passes/remove-proxy-rules.js +++ b/lib/compiler/passes/remove-proxy-rules.js @@ -2,9 +2,7 @@ let visitor = require("../visitor"); -/* - * Removes proxy rules -- that is, rules that only delegate to other rule. - */ +// Removes proxy rules -- that is, rules that only delegate to other rule. function removeProxyRules(ast, options) { function isProxyRule(node) { return node.type === "rule" && node.expression.type === "rule_ref"; diff --git a/lib/compiler/passes/report-duplicate-labels.js b/lib/compiler/passes/report-duplicate-labels.js index d846965..00e72ab 100644 --- a/lib/compiler/passes/report-duplicate-labels.js +++ b/lib/compiler/passes/report-duplicate-labels.js @@ -3,7 +3,7 @@ let GrammarError = require("../../grammar-error"); let visitor = require("../visitor"); -/* Checks that each label is defined only once within each scope. */ +// Checks that each label is defined only once within each scope. function reportDuplicateLabels(ast) { function cloneEnv(env) { let clone = {}; diff --git a/lib/compiler/passes/report-duplicate-rules.js b/lib/compiler/passes/report-duplicate-rules.js index 114b7b4..a4e764a 100644 --- a/lib/compiler/passes/report-duplicate-rules.js +++ b/lib/compiler/passes/report-duplicate-rules.js @@ -3,7 +3,7 @@ let GrammarError = require("../../grammar-error"); let visitor = require("../visitor"); -/* Checks that each rule is defined only once. */ +// Checks that each rule is defined only once. function reportDuplicateRules(ast) { let rules = {}; diff --git a/lib/compiler/passes/report-infinite-recursion.js b/lib/compiler/passes/report-infinite-recursion.js index 9f8053d..ac65cdb 100644 --- a/lib/compiler/passes/report-infinite-recursion.js +++ b/lib/compiler/passes/report-infinite-recursion.js @@ -4,18 +4,16 @@ let GrammarError = require("../../grammar-error"); let asts = require("../asts"); let visitor = require("../visitor"); -/* - * Reports left recursion in the grammar, which prevents infinite recursion in - * the generated parser. - * - * Both direct and indirect recursion is detected. The pass also correctly - * reports cases like this: - * - * start = "a"? start - * - * In general, if a rule reference can be reached without consuming any input, - * it can lead to left recursion. - */ +// Reports left recursion in the grammar, which prevents infinite recursion in +// the generated parser. +// +// Both direct and indirect recursion is detected. The pass also correctly +// reports cases like this: +// +// start = "a"? start +// +// In general, if a rule reference can be reached without consuming any input, +// it can lead to left recursion. function reportInfiniteRecursion(ast) { let visitedRules = []; diff --git a/lib/compiler/passes/report-infinite-repetition.js b/lib/compiler/passes/report-infinite-repetition.js index f0f4a1c..f6766ea 100644 --- a/lib/compiler/passes/report-infinite-repetition.js +++ b/lib/compiler/passes/report-infinite-repetition.js @@ -4,10 +4,8 @@ let GrammarError = require("../../grammar-error"); let asts = require("../asts"); let visitor = require("../visitor"); -/* - * Reports expressions that don't consume any input inside |*| or |+| in the - * grammar, which prevents infinite loops in the generated parser. - */ +// Reports expressions that don't consume any input inside |*| or |+| in the +// grammar, which prevents infinite loops in the generated parser. function reportInfiniteRepetition(ast) { let check = visitor.build({ zero_or_more: function(node) { diff --git a/lib/compiler/passes/report-undefined-rules.js b/lib/compiler/passes/report-undefined-rules.js index 96a8a81..a2430f3 100644 --- a/lib/compiler/passes/report-undefined-rules.js +++ b/lib/compiler/passes/report-undefined-rules.js @@ -4,7 +4,7 @@ let GrammarError = require("../../grammar-error"); let asts = require("../asts"); let visitor = require("../visitor"); -/* Checks that all referenced rules exist. */ +// Checks that all referenced rules exist. function reportUndefinedRules(ast) { let check = visitor.build({ rule_ref: function(node) { diff --git a/lib/compiler/visitor.js b/lib/compiler/visitor.js index d3c0859..82715e5 100644 --- a/lib/compiler/visitor.js +++ b/lib/compiler/visitor.js @@ -1,6 +1,6 @@ "use strict"; -/* Simple AST node visitor builder. */ +// Simple AST node visitor builder. let visitor = { build: function(functions) { function visit(node) { diff --git a/lib/grammar-error.js b/lib/grammar-error.js index d4905e5..ce3fb98 100644 --- a/lib/grammar-error.js +++ b/lib/grammar-error.js @@ -1,6 +1,6 @@ "use strict"; -/* Thrown when the grammar contains an error. */ +// Thrown when the grammar contains an error. class GrammarError { constructor(message, location) { this.name = "GrammarError"; diff --git a/lib/parser.js b/lib/parser.js index 04e63f1..1990dbe 100644 --- a/lib/parser.js +++ b/lib/parser.js @@ -1,11 +1,9 @@ /* eslint-env node, amd */ /* eslint no-unused-vars: 0 */ -/* - * Generated by PEG.js 0.10.0. - * - * http://pegjs.org/ - */ +// Generated by PEG.js 0.10.0. +// +// http://pegjs.org/ "use strict"; @@ -236,12 +234,10 @@ function peg$parse(input, options) { var peg$c29 = ")"; var peg$c30 = peg$literalExpectation(")", false); var peg$c31 = function(expression) { - /* - * The purpose of the "group" AST node is just to isolate label scope. We - * don't need to put it around nodes that can't contain any labels or - * nodes that already isolate label scope themselves. This leaves us with - * "labeled" and "sequence". - */ + // The purpose of the "group" AST node is just to isolate label scope. We + // don't need to put it around nodes that can't contain any labels or + // nodes that already isolate label scope themselves. This leaves us with + // "labeled" and "sequence". return expression.type === 'labeled' || expression.type === 'sequence' ? { type: "group", expression: expression } : expression; diff --git a/lib/peg.js b/lib/peg.js index 1ad812b..0c2f43a 100644 --- a/lib/peg.js +++ b/lib/peg.js @@ -1,24 +1,22 @@ "use strict"; let peg = { - /* PEG.js version (uses semantic versioning). */ + // PEG.js version (uses semantic versioning). VERSION: "0.10.0", GrammarError: require("./grammar-error"), parser: require("./parser"), compiler: require("./compiler"), - /* - * Generates a parser from a specified grammar and returns it. - * - * The grammar must be a string in the format described by the metagramar in - * the parser.pegjs file. - * - * Throws |peg.parser.SyntaxError| if the grammar contains a syntax error or - * |peg.GrammarError| if it contains a semantic error. Note that not all - * errors are detected during the generation and some may protrude to the - * generated parser and cause its malfunction. - */ + // Generates a parser from a specified grammar and returns it. + // + // The grammar must be a string in the format described by the metagramar in + // the parser.pegjs file. + // + // Throws |peg.parser.SyntaxError| if the grammar contains a syntax error or + // |peg.GrammarError| if it contains a semantic error. Note that not all + // errors are detected during the generation and some may protrude to the + // generated parser and cause its malfunction. generate: function(grammar, options) { options = options !== undefined ? options : {}; diff --git a/spec/api/pegjs-api.spec.js b/spec/api/pegjs-api.spec.js index 9ef162f..6c25c5c 100644 --- a/spec/api/pegjs-api.spec.js +++ b/spec/api/pegjs-api.spec.js @@ -26,10 +26,8 @@ describe("PEG.js API", function() { 'c = "x"' ].join("\n"); - /* - * The |allowedStartRules| option is implemented separately for each - * optimization mode, so we need to test it in both. - */ + // The |allowedStartRules| option is implemented separately for each + // optimization mode, so we need to test it in both. describe("when optimizing for parsing speed", function() { describe("when |allowedStartRules| is not set", function() { @@ -151,10 +149,8 @@ describe("PEG.js API", function() { }); }); - /* - * The |optimize| option isn't tested because there is no meaningful way to - * write the specs without turning this into a performance test. - */ + // The |optimize| option isn't tested because there is no meaningful way to + // write the specs without turning this into a performance test. describe("output", function() { let grammar = 'start = "a"'; @@ -187,13 +183,11 @@ describe("PEG.js API", function() { }); }); - /* - * The |format|, |exportVars|, and |dependencies| options are not tested - * becasue there is no meaningful way to thest their effects without turning - * this into an integration test. - */ + // The |format|, |exportVars|, and |dependencies| options are not tested + // becasue there is no meaningful way to thest their effects without turning + // this into an integration test. - /* The |plugins| option is tested in plugin API specs. */ + // The |plugins| option is tested in plugin API specs. it("accepts custom options", function() { peg.generate('start = "a"', { foo: 42 }); diff --git a/spec/behavior/generated-parser-behavior.spec.js b/spec/behavior/generated-parser-behavior.spec.js index fdae790..b7b3c3b 100644 --- a/spec/behavior/generated-parser-behavior.spec.js +++ b/spec/behavior/generated-parser-behavior.spec.js @@ -120,10 +120,8 @@ describe("generated parser behavior", function() { } }); - /* - * Stub out |console.log| so that the default tracer doesn't clutter - * test output. - */ + // Stub out |console.log| so that the default tracer doesn't clutter + // test output. if (typeof console === "object") { spyOn(console, "log"); } @@ -412,11 +410,9 @@ describe("generated parser behavior", function() { describe("positive semantic predicate", function() { describe("when the code returns a truthy value", function() { it("returns |undefined|", function() { - /* - * The |""| is needed so that the parser doesn't return just - * |undefined| which we can't compare against in |toParse| due to the - * way optional parameters work. - */ + // The |""| is needed so that the parser doesn't return just + // |undefined| which we can't compare against in |toParse| due to the + // way optional parameters work. let parser = peg.generate('start = &{ return true; } ""', options); expect(parser).toParse("", [undefined, ""]); @@ -592,7 +588,7 @@ describe("generated parser behavior", function() { end: { offset: 13, line: 7, column: 5 } }); - /* Newline representations */ + // Newline representations expect(parser).toParse("1\nx", { // Unix start: { offset: 2, line: 2, column: 1 }, end: { offset: 2, line: 2, column: 1 } @@ -608,11 +604,9 @@ describe("generated parser behavior", function() { describe("negative semantic predicate", function() { describe("when the code returns a falsey value", function() { it("returns |undefined|", function() { - /* - * The |""| is needed so that the parser doesn't return just - * |undefined| which we can't compare against in |toParse| due to the - * way optional parameters work. - */ + // The |""| is needed so that the parser doesn't return just + // |undefined| which we can't compare against in |toParse| due to the + // way optional parameters work. let parser = peg.generate('start = !{ return false; } ""', options); expect(parser).toParse("", [undefined, ""]); @@ -788,7 +782,7 @@ describe("generated parser behavior", function() { end: { offset: 13, line: 7, column: 5 } }); - /* Newline representations */ + // Newline representations expect(parser).toParse("1\nx", { // Unix start: { offset: 2, line: 2, column: 1 }, end: { offset: 2, line: 2, column: 1 } @@ -1166,7 +1160,7 @@ describe("generated parser behavior", function() { end: { offset: 14, line: 7, column: 6 } }); - /* Newline representations */ + // Newline representations expect(parser).toParse("1\nx", { // Unix start: { offset: 2, line: 2, column: 1 }, end: { offset: 3, line: 2, column: 2 } @@ -1460,7 +1454,7 @@ describe("generated parser behavior", function() { } }); - /* Newline representations */ + // Newline representations expect(parser).toFailToParse("1\nx", { // Old Mac location: { start: { offset: 2, line: 2, column: 1 }, @@ -1477,18 +1471,14 @@ describe("generated parser behavior", function() { }); }); - /* - * Following examples are from Wikipedia, see - * http://en.wikipedia.org/w/index.php?title=Parsing_expression_grammar&oldid=335106938. - */ + // Following examples are from Wikipedia, see + // http://en.wikipedia.org/w/index.php?title=Parsing_expression_grammar&oldid=335106938. describe("complex examples", function() { it("handles arithmetics example correctly", function() { - /* - * Value ← [0-9]+ / '(' Expr ')' - * Product ← Value (('*' / '/') Value)* - * Sum ← Product (('+' / '-') Product)* - * Expr ← Sum - */ + // Value ← [0-9]+ / '(' Expr ')' + // Product ← Value (('*' / '/') Value)* + // Sum ← Product (('+' / '-') Product)* + // Expr ← Sum let parser = peg.generate([ 'Expr = Sum', 'Sum = head:Product tail:(("+" / "-") Product)* {', @@ -1507,40 +1497,39 @@ describe("generated parser behavior", function() { ' / "(" expr:Expr ")" { return expr; }' ].join("\n"), options); - /* The "value" rule */ + // The "value" rule expect(parser).toParse("0", 0); expect(parser).toParse("123", 123); expect(parser).toParse("(42+43)", 42+43); - /* The "product" rule */ + // The "product" rule expect(parser).toParse("42", 42); expect(parser).toParse("42*43", 42*43); expect(parser).toParse("42*43*44*45", 42*43*44*45); expect(parser).toParse("42/43", 42/43); expect(parser).toParse("42/43/44/45", 42/43/44/45); - /* The "sum" rule */ + // The "sum" rule expect(parser).toParse("42*43", 42*43); expect(parser).toParse("42*43+44*45", 42*43+44*45); expect(parser).toParse("42*43+44*45+46*47+48*49", 42*43+44*45+46*47+48*49); expect(parser).toParse("42*43-44*45", 42*43-44*45); expect(parser).toParse("42*43-44*45-46*47-48*49", 42*43-44*45-46*47-48*49); - /* The "expr" rule */ + // The "expr" rule expect(parser).toParse("42+43", 42+43); - /* Complex test */ + // Complex test expect(parser).toParse("(1+2)*(3+4)", (1+2)*(3+4)); }); it("handles non-context-free language correctly", function() { - /* The following parsing expression grammar describes the classic - * non-context-free language { a^n b^n c^n : n >= 1 }: - * - * S ← &(A c) a+ B !(a/b/c) - * A ← a A? b - * B ← b B? c - */ + // The following parsing expression grammar describes the classic + // non-context-free language { a^n b^n c^n : n >= 1 }: + // + // S ← &(A c) a+ B !(a/b/c) + // A ← a A? b + // B ← b B? c let parser = peg.generate([ 'S = &(A "c") a:"a"+ B:B !("a" / "b" / "c") { return a.join("") + B; }', 'A = a:"a" A:A? b:"b" { return [a, A, b].join(""); }', @@ -1558,13 +1547,11 @@ describe("generated parser behavior", function() { }); it("handles nested comments example correctly", function() { - /* - * Begin ← "(*" - * End ← "*)" - * C ← Begin N* End - * N ← C / (!Begin !End Z) - * Z ← any single character - */ + // Begin ← "(*" + // End ← "*)" + // C ← Begin N* End + // N ← C / (!Begin !End Z) + // Z ← any single character let parser = peg.generate([ 'C = begin:Begin ns:N* end:End { return begin + ns.join("") + end; }', 'N = C', diff --git a/spec/server b/spec/server index e1a0a36..686b393 100755 --- a/spec/server +++ b/spec/server @@ -5,11 +5,9 @@ "use strict"; -/* - * Small server whose main purpose is to ensure that both the specced code and - * the specs get passed through Babel & Browserify before they are served to the - * browser. - */ +// Small server whose main purpose is to ensure that both the specced code and +// the specs get passed through Babel & Browserify before they are served to the +// browser. let express = require("express"); let logger = require("morgan"); diff --git a/spec/unit/compiler/passes/report-infinite-recursion.spec.js b/spec/unit/compiler/passes/report-infinite-recursion.spec.js index 6f3fb4d..0fda874 100644 --- a/spec/unit/compiler/passes/report-infinite-recursion.spec.js +++ b/spec/unit/compiler/passes/report-infinite-recursion.spec.js @@ -39,7 +39,7 @@ describe("compiler pass |reportInfiniteRecursion|", function() { expect(pass).not.toReportError('start = "" "" "a" start'); }); - /* Regression test for #359. */ + // Regression test for #359. it("reports left recursion when rule reference is wrapped in an expression", function() { expect(pass).toReportError('start = "" start?'); }); diff --git a/spec/unit/parser.spec.js b/spec/unit/parser.spec.js index cf86b48..c7d1ac4 100644 --- a/spec/unit/parser.spec.js +++ b/spec/unit/parser.spec.js @@ -236,7 +236,7 @@ describe("PEG.js grammar parser", function() { }); }); - /* Canonical Grammar is "a = \"abcd\"; b = \"efgh\"; c = \"ijkl\";". */ + // Canonical Grammar is "a = \"abcd\"; b = \"efgh\"; c = \"ijkl\";". it("parses Grammar", function() { expect('\na = "abcd";\n').toParseAs( { type: "grammar", initializer: null, rules: [ruleA] } @@ -249,14 +249,14 @@ describe("PEG.js grammar parser", function() { ); }); - /* Canonical Initializer is "{ code }". */ + // Canonical Initializer is "{ code }". it("parses Initializer", function() { expect('{ code };start = "abcd"').toParseAs( { type: "grammar", initializer: initializer, rules: [ruleStart] } ); }); - /* Canonical Rule is "a = \"abcd\";". */ + // Canonical Rule is "a = \"abcd\";". it("parses Rule", function() { expect('start\n=\n"abcd";').toParseAs( oneRuleGrammar(literalAbcd) @@ -266,14 +266,14 @@ describe("PEG.js grammar parser", function() { ); }); - /* Canonical Expression is "\"abcd\"". */ + // Canonical Expression is "\"abcd\"". it("parses Expression", function() { expect('start = "abcd" / "efgh" / "ijkl"').toParseAs( oneRuleGrammar(choice) ); }); - /* Canonical ChoiceExpression is "\"abcd\" / \"efgh\" / \"ijkl\"". */ + // Canonical ChoiceExpression is "\"abcd\" / \"efgh\" / \"ijkl\"". it("parses ChoiceExpression", function() { expect('start = "abcd" { code }').toParseAs( oneRuleGrammar(actionAbcd) @@ -288,7 +288,7 @@ describe("PEG.js grammar parser", function() { ); }); - /* Canonical ActionExpression is "\"abcd\" { code }". */ + // Canonical ActionExpression is "\"abcd\" { code }". it("parses ActionExpression", function() { expect('start = "abcd" "efgh" "ijkl"').toParseAs( oneRuleGrammar(sequence) @@ -298,7 +298,7 @@ describe("PEG.js grammar parser", function() { ); }); - /* Canonical SequenceExpression is "\"abcd\" \"efgh\" \"ijkl\"". */ + // Canonical SequenceExpression is "\"abcd\" \"efgh\" \"ijkl\"". it("parses SequenceExpression", function() { expect('start = a:"abcd"').toParseAs( oneRuleGrammar(labeledAbcd) @@ -311,39 +311,39 @@ describe("PEG.js grammar parser", function() { ); }); - /* Canonical LabeledExpression is "a:\"abcd\"". */ + // Canonical LabeledExpression is "a:\"abcd\"". it("parses LabeledExpression", function() { expect('start = a\n:\n!"abcd"').toParseAs(oneRuleGrammar(labeledSimpleNot)); expect('start = !"abcd"' ).toParseAs(oneRuleGrammar(simpleNotAbcd)); }); - /* Canonical PrefixedExpression is "!\"abcd\"". */ + // Canonical PrefixedExpression is "!\"abcd\"". it("parses PrefixedExpression", function() { expect('start = !\n"abcd"?' ).toParseAs(oneRuleGrammar(simpleNotOptional)); expect('start = "abcd"?' ).toParseAs(oneRuleGrammar(optional)); }); - /* Canonical PrefixedOperator is "!". */ + // Canonical PrefixedOperator is "!". it("parses PrefixedOperator", function() { expect('start = $"abcd"?').toParseAs(oneRuleGrammar(textOptional)); expect('start = &"abcd"?').toParseAs(oneRuleGrammar(simpleAndOptional)); expect('start = !"abcd"?').toParseAs(oneRuleGrammar(simpleNotOptional)); }); - /* Canonical SuffixedExpression is "\"ebcd\"?". */ + // Canonical SuffixedExpression is "\"ebcd\"?". it("parses SuffixedExpression", function() { expect('start = "abcd"\n?').toParseAs(oneRuleGrammar(optional)); expect('start = "abcd"' ).toParseAs(oneRuleGrammar(literalAbcd)); }); - /* Canonical SuffixedOperator is "?". */ + // Canonical SuffixedOperator is "?". it("parses SuffixedOperator", function() { expect('start = "abcd"?').toParseAs(oneRuleGrammar(optional)); expect('start = "abcd"*').toParseAs(oneRuleGrammar(zeroOrMore)); expect('start = "abcd"+').toParseAs(oneRuleGrammar(oneOrMore)); }); - /* Canonical PrimaryExpression is "\"abcd\"". */ + // Canonical PrimaryExpression is "\"abcd\"". it("parses PrimaryExpression", function() { expect('start = "abcd"' ).toParseAs(trivialGrammar); expect('start = [a-d]' ).toParseAs(classGrammar([["a", "d"]], false, false)); @@ -356,7 +356,7 @@ describe("PEG.js grammar parser", function() { expect('start = (\n"abcd"\n)' ).toParseAs(trivialGrammar); }); - /* Canonical RuleReferenceExpression is "a". */ + // Canonical RuleReferenceExpression is "a". it("parses RuleReferenceExpression", function() { expect('start = a').toParseAs(ruleRefGrammar("a")); @@ -364,20 +364,20 @@ describe("PEG.js grammar parser", function() { expect('start = a\n"abcd"\n=').toFailToParse(); }); - /* Canonical SemanticPredicateExpression is "!{ code }". */ + // Canonical SemanticPredicateExpression is "!{ code }". it("parses SemanticPredicateExpression", function() { expect('start = !\n{ code }').toParseAs(oneRuleGrammar(semanticNot)); }); - /* Canonical SemanticPredicateOperator is "!". */ + // Canonical SemanticPredicateOperator is "!". it("parses SemanticPredicateOperator", function() { expect('start = &{ code }').toParseAs(oneRuleGrammar(semanticAnd)); expect('start = !{ code }').toParseAs(oneRuleGrammar(semanticNot)); }); - /* The SourceCharacter rule is not tested. */ + // The SourceCharacter rule is not tested. - /* Canonical WhiteSpace is " ". */ + // Canonical WhiteSpace is " ". it("parses WhiteSpace", function() { expect('start =\t"abcd"' ).toParseAs(trivialGrammar); expect('start =\v"abcd"' ).toParseAs(trivialGrammar); @@ -388,7 +388,7 @@ describe("PEG.js grammar parser", function() { expect('start =\u1680"abcd"').toParseAs(trivialGrammar); }); - /* Canonical LineTerminator is "\n". */ + // Canonical LineTerminator is "\n". it("parses LineTerminator", function() { expect('start = "\n"' ).toFailToParse(); expect('start = "\r"' ).toFailToParse(); @@ -396,7 +396,7 @@ describe("PEG.js grammar parser", function() { expect('start = "\u2029"').toFailToParse(); }); - /* Canonical LineTerminatorSequence is "\r\n". */ + // Canonical LineTerminatorSequence is "\r\n". it("parses LineTerminatorSequence", function() { expect('start =\n"abcd"' ).toParseAs(trivialGrammar); expect('start =\r\n"abcd"' ).toParseAs(trivialGrammar); @@ -430,7 +430,7 @@ describe("PEG.js grammar parser", function() { expect('a = "abcd"/*\n*/\r\nb = "efgh"').toFailToParse(); }); - /* Canonical SingleLineComment is "// comment". */ + // Canonical SingleLineComment is "// comment". it("parses SingleLineComment", function() { expect('start =//\n"abcd"' ).toParseAs(trivialGrammar); expect('start =//a\n"abcd"' ).toParseAs(trivialGrammar); @@ -439,19 +439,19 @@ describe("PEG.js grammar parser", function() { expect('start =//\n@\n"abcd"').toFailToParse(); }); - /* Canonical Identifier is "a". */ + // Canonical Identifier is "a". it("parses Identifier", function() { expect('start = a:"abcd"').toParseAs(oneRuleGrammar(labeledAbcd)); }); - /* Canonical IdentifierName is "a". */ + // Canonical IdentifierName is "a". it("parses IdentifierName", function() { expect('start = a' ).toParseAs(ruleRefGrammar("a")); expect('start = ab' ).toParseAs(ruleRefGrammar("ab")); expect('start = abcd').toParseAs(ruleRefGrammar("abcd")); }); - /* Canonical IdentifierStart is "a". */ + // Canonical IdentifierStart is "a". it("parses IdentifierStart", function() { expect('start = a' ).toParseAs(ruleRefGrammar("a")); expect('start = $' ).toParseAs(ruleRefGrammar("$")); @@ -459,7 +459,7 @@ describe("PEG.js grammar parser", function() { expect('start = \\u0061').toParseAs(ruleRefGrammar("a")); }); - /* Canonical IdentifierPart is "a". */ + // Canonical IdentifierPart is "a". it("parses IdentifierPart", function() { expect('start = aa' ).toParseAs(ruleRefGrammar("aa")); expect('start = a\u0300').toParseAs(ruleRefGrammar("a\u0300")); @@ -469,15 +469,15 @@ describe("PEG.js grammar parser", function() { expect('start = a\u200D').toParseAs(ruleRefGrammar("a\u200D")); }); - /* Unicode rules and reserved word rules are not tested. */ + // Unicode rules and reserved word rules are not tested. - /* Canonical LiteralMatcher is "\"abcd\"". */ + // Canonical LiteralMatcher is "\"abcd\"". it("parses LiteralMatcher", function() { expect('start = "abcd"' ).toParseAs(literalGrammar("abcd", false)); expect('start = "abcd"i').toParseAs(literalGrammar("abcd", true)); }); - /* Canonical StringLiteral is "\"abcd\"". */ + // Canonical StringLiteral is "\"abcd\"". it("parses StringLiteral", function() { expect('start = ""' ).toParseAs(literalGrammar("", false)); expect('start = "a"' ).toParseAs(literalGrammar("a", false)); @@ -488,7 +488,7 @@ describe("PEG.js grammar parser", function() { expect("start = 'abc'").toParseAs(literalGrammar("abc", false)); }); - /* Canonical DoubleStringCharacter is "a". */ + // Canonical DoubleStringCharacter is "a". it("parses DoubleStringCharacter", function() { expect('start = "a"' ).toParseAs(literalGrammar("a", false)); expect('start = "\\n"' ).toParseAs(literalGrammar("\n", false)); @@ -499,7 +499,7 @@ describe("PEG.js grammar parser", function() { expect('start = "\n"').toFailToParse(); }); - /* Canonical SingleStringCharacter is "a". */ + // Canonical SingleStringCharacter is "a". it("parses SingleStringCharacter", function() { expect("start = 'a'" ).toParseAs(literalGrammar("a", false)); expect("start = '\\n'" ).toParseAs(literalGrammar("\n", false)); @@ -510,7 +510,7 @@ describe("PEG.js grammar parser", function() { expect("start = '\n'").toFailToParse(); }); - /* Canonical CharacterClassMatcher is "[a-d]". */ + // Canonical CharacterClassMatcher is "[a-d]". it("parses CharacterClassMatcher", function() { expect('start = []').toParseAs( classGrammar([], false, false) @@ -540,7 +540,7 @@ describe("PEG.js grammar parser", function() { ); }); - /* Canonical ClassCharacterRange is "a-d". */ + // Canonical ClassCharacterRange is "a-d". it("parses ClassCharacterRange", function() { expect('start = [a-d]').toParseAs(classGrammar([["a", "d"]], false, false)); @@ -550,7 +550,7 @@ describe("PEG.js grammar parser", function() { }); }); - /* Canonical ClassCharacter is "a". */ + // Canonical ClassCharacter is "a". it("parses ClassCharacter", function() { expect('start = [a]' ).toParseAs(classGrammar(["a"], false, false)); expect('start = [\\n]' ).toParseAs(classGrammar(["\n"], false, false)); @@ -561,12 +561,12 @@ describe("PEG.js grammar parser", function() { expect('start = [\n]').toFailToParse(); }); - /* Canonical LineContinuation is "\\\n". */ + // Canonical LineContinuation is "\\\n". it("parses LineContinuation", function() { expect('start = "\\\r\n"').toParseAs(literalGrammar("", false)); }); - /* Canonical EscapeSequence is "n". */ + // Canonical EscapeSequence is "n". it("parses EscapeSequence", function() { expect('start = "\\n"' ).toParseAs(literalGrammar("\n", false)); expect('start = "\\0"' ).toParseAs(literalGrammar("\x00", false)); @@ -576,13 +576,13 @@ describe("PEG.js grammar parser", function() { expect('start = "\\09"').toFailToParse(); }); - /* Canonical CharacterEscapeSequence is "n". */ + // Canonical CharacterEscapeSequence is "n". it("parses CharacterEscapeSequence", function() { expect('start = "\\n"').toParseAs(literalGrammar("\n", false)); expect('start = "\\a"').toParseAs(literalGrammar("a", false)); }); - /* Canonical SingleEscapeCharacter is "n". */ + // Canonical SingleEscapeCharacter is "n". it("parses SingleEscapeCharacter", function() { expect('start = "\\\'"').toParseAs(literalGrammar("'", false)); expect('start = "\\""' ).toParseAs(literalGrammar('"', false)); @@ -595,44 +595,40 @@ describe("PEG.js grammar parser", function() { expect('start = "\\v"' ).toParseAs(literalGrammar("\v", false)); }); - /* Canonical NonEscapeCharacter is "a". */ + // Canonical NonEscapeCharacter is "a". it("parses NonEscapeCharacter", function() { expect('start = "\\a"').toParseAs(literalGrammar("a", false)); - /* - * The negative predicate is impossible to test with PEG.js grammar - * structure. - */ + // The negative predicate is impossible to test with PEG.js grammar + // structure. }); - /* - * The EscapeCharacter rule is impossible to test with PEG.js grammar - * structure. - */ + // The EscapeCharacter rule is impossible to test with PEG.js grammar + // structure. - /* Canonical HexEscapeSequence is "xFF". */ + // Canonical HexEscapeSequence is "xFF". it("parses HexEscapeSequence", function() { expect('start = "\\xFF"').toParseAs(literalGrammar("\xFF", false)); }); - /* Canonical UnicodeEscapeSequence is "uFFFF". */ + // Canonical UnicodeEscapeSequence is "uFFFF". it("parses UnicodeEscapeSequence", function() { expect('start = "\\uFFFF"').toParseAs(literalGrammar("\uFFFF", false)); }); - /* Digit rules are not tested. */ + // Digit rules are not tested. - /* Canonical AnyMatcher is ".". */ + // Canonical AnyMatcher is ".". it("parses AnyMatcher", function() { expect('start = .').toParseAs(anyGrammar()); }); - /* Canonical CodeBlock is "{ code }". */ + // Canonical CodeBlock is "{ code }". it("parses CodeBlock", function() { expect('start = "abcd" { code }').toParseAs(actionGrammar(" code ")); }); - /* Canonical Code is " code ". */ + // Canonical Code is " code ". it("parses Code", function() { expect('start = "abcd" {a}' ).toParseAs(actionGrammar("a")); expect('start = "abcd" {abc}' ).toParseAs(actionGrammar("abc")); @@ -643,9 +639,9 @@ describe("PEG.js grammar parser", function() { expect('start = "abcd" {}}').toFailToParse(); }); - /* Unicode character category rules and token rules are not tested. */ + // Unicode character category rules and token rules are not tested. - /* Canonical __ is "\n". */ + // Canonical __ is "\n". it("parses __", function() { expect('start ="abcd"' ).toParseAs(trivialGrammar); expect('start = "abcd"' ).toParseAs(trivialGrammar); @@ -654,7 +650,7 @@ describe("PEG.js grammar parser", function() { expect('start = "abcd"' ).toParseAs(trivialGrammar); }); - /* Canonical _ is " ". */ + // Canonical _ is " ". it("parses _", function() { expect('a = "abcd"\r\nb = "efgh"' ).toParseAs(twoRuleGrammar); expect('a = "abcd" \r\nb = "efgh"' ).toParseAs(twoRuleGrammar); @@ -662,7 +658,7 @@ describe("PEG.js grammar parser", function() { expect('a = "abcd" \r\nb = "efgh"' ).toParseAs(twoRuleGrammar); }); - /* Canonical EOS is ";". */ + // Canonical EOS is ";". it("parses EOS", function() { expect('a = "abcd"\n;b = "efgh"' ).toParseAs(twoRuleGrammar); expect('a = "abcd" \r\nb = "efgh"' ).toParseAs(twoRuleGrammar); @@ -670,7 +666,7 @@ describe("PEG.js grammar parser", function() { expect('a = "abcd"\nb = "efgh"' ).toParseAs(twoRuleGrammar); }); - /* Canonical EOF is the end of input. */ + // Canonical EOF is the end of input. it("parses EOF", function() { expect('start = "abcd"\n').toParseAs(trivialGrammar); }); diff --git a/src/parser.pegjs b/src/parser.pegjs index b035699..0feb1fb 100644 --- a/src/parser.pegjs +++ b/src/parser.pegjs @@ -1,27 +1,25 @@ -/* - * PEG.js Grammar - * ============== - * - * PEG.js grammar syntax is designed to be simple, expressive, and similar to - * JavaScript where possible. This means that many rules, especially in the - * lexical part, are based on the grammar from ECMA-262, 5.1 Edition [1]. Some - * are directly taken or adapted from the JavaScript example grammar (see - * examples/javascript.pegjs). - * - * Limitations: - * - * * Non-BMP characters are completely ignored to avoid surrogate pair - * handling. - * - * * One can create identifiers containing illegal characters using Unicode - * escape sequences. For example, "abcd\u0020efgh" is not a valid - * identifier, but it is accepted by the parser. - * - * Both limitations could be resolved, but the costs would likely outweigh - * the benefits. - * - * [1] http://www.ecma-international.org/publications/standards/Ecma-262.htm - */ +// PEG.js Grammar +// ============== +// +// PEG.js grammar syntax is designed to be simple, expressive, and similar to +// JavaScript where possible. This means that many rules, especially in the +// lexical part, are based on the grammar from ECMA-262, 5.1 Edition [1]. Some +// are directly taken or adapted from the JavaScript example grammar (see +// examples/javascript.pegjs). +// +// Limitations: +// +// * Non-BMP characters are completely ignored to avoid surrogate pair +// handling. +// +// * One can create identifiers containing illegal characters using Unicode +// escape sequences. For example, "abcd\u0020efgh" is not a valid +// identifier, but it is accepted by the parser. +// +// Both limitations could be resolved, but the costs would likely outweigh +// the benefits. +// +// [1] http://www.ecma-international.org/publications/standards/Ecma-262.htm { const OPS_TO_PREFIXED_TYPES = { @@ -54,7 +52,7 @@ } } -/* ---- Syntactic Grammar ----- */ +// ---- Syntactic Grammar ----- Grammar = __ initializer:(Initializer __)? rules:(Rule __)+ { @@ -177,12 +175,10 @@ PrimaryExpression / RuleReferenceExpression / SemanticPredicateExpression / "(" __ expression:Expression __ ")" { - /* - * The purpose of the "group" AST node is just to isolate label scope. We - * don't need to put it around nodes that can't contain any labels or - * nodes that already isolate label scope themselves. This leaves us with - * "labeled" and "sequence". - */ + // The purpose of the "group" AST node is just to isolate label scope. We + // don't need to put it around nodes that can't contain any labels or + // nodes that already isolate label scope themselves. This leaves us with + // "labeled" and "sequence". return expression.type === 'labeled' || expression.type === 'sequence' ? { type: "group", expression: expression } : expression; @@ -206,7 +202,7 @@ SemanticPredicateOperator = "&" / "!" -/* ---- Lexical Grammar ----- */ +// ---- Lexical Grammar ----- SourceCharacter = . @@ -445,30 +441,28 @@ CodeBlock "code block" Code = $((![{}] SourceCharacter)+ / "{" Code "}")* -/* - * Unicode Character Categories - * - * Extracted from the following Unicode Character Database file: - * - * http://www.unicode.org/Public/8.0.0/ucd/extracted/DerivedGeneralCategory.txt - * - * Unix magic used: - * - * grep "; $CATEGORY" DerivedGeneralCategory.txt | # Filter characters - * cut -f1 -d " " | # Extract code points - * grep -v '[0-9a-fA-F]\{5\}' | # Exclude non-BMP characters - * sed -e 's/\.\./-/' | # Adjust formatting - * sed -e 's/\([0-9a-fA-F]\{4\}\)/\\u\1/g' | # Adjust formatting - * tr -d '\n' # Join lines - * - * ECMA-262 allows using Unicode 3.0 or later, version 8.0.0 was the latest one - * at the time of writing. - * - * Non-BMP characters are completely ignored to avoid surrogate pair handling - * (detecting surrogate pairs isn't possible with a simple character class and - * other methods would degrade performance). I don't consider it a big deal as - * even parsers in JavaScript engines of common browsers seem to ignore them. - */ +// Unicode Character Categories +// +// Extracted from the following Unicode Character Database file: +// +// http://www.unicode.org/Public/8.0.0/ucd/extracted/DerivedGeneralCategory.txt +// +// Unix magic used: +// +// grep "; $CATEGORY" DerivedGeneralCategory.txt | # Filter characters +// cut -f1 -d " " | # Extract code points +// grep -v '[0-9a-fA-F]\{5\}' | # Exclude non-BMP characters +// sed -e 's/\.\./-/' | # Adjust formatting +// sed -e 's/\([0-9a-fA-F]\{4\}\)/\\u\1/g' | # Adjust formatting +// tr -d '\n' # Join lines +// +// ECMA-262 allows using Unicode 3.0 or later, version 8.0.0 was the latest one +// at the time of writing. +// +// Non-BMP characters are completely ignored to avoid surrogate pair handling +// (detecting surrogate pairs isn't possible with a simple character class and +// other methods would degrade performance). I don't consider it a big deal as +// even parsers in JavaScript engines of common browsers seem to ignore them. // Letter, Lowercase Ll = [\u0061-\u007A\u00B5\u00DF-\u00F6\u00F8-\u00FF\u0101\u0103\u0105\u0107\u0109\u010B\u010D\u010F\u0111\u0113\u0115\u0117\u0119\u011B\u011D\u011F\u0121\u0123\u0125\u0127\u0129\u012B\u012D\u012F\u0131\u0133\u0135\u0137-\u0138\u013A\u013C\u013E\u0140\u0142\u0144\u0146\u0148-\u0149\u014B\u014D\u014F\u0151\u0153\u0155\u0157\u0159\u015B\u015D\u015F\u0161\u0163\u0165\u0167\u0169\u016B\u016D\u016F\u0171\u0173\u0175\u0177\u017A\u017C\u017E-\u0180\u0183\u0185\u0188\u018C-\u018D\u0192\u0195\u0199-\u019B\u019E\u01A1\u01A3\u01A5\u01A8\u01AA-\u01AB\u01AD\u01B0\u01B4\u01B6\u01B9-\u01BA\u01BD-\u01BF\u01C6\u01C9\u01CC\u01CE\u01D0\u01D2\u01D4\u01D6\u01D8\u01DA\u01DC-\u01DD\u01DF\u01E1\u01E3\u01E5\u01E7\u01E9\u01EB\u01ED\u01EF-\u01F0\u01F3\u01F5\u01F9\u01FB\u01FD\u01FF\u0201\u0203\u0205\u0207\u0209\u020B\u020D\u020F\u0211\u0213\u0215\u0217\u0219\u021B\u021D\u021F\u0221\u0223\u0225\u0227\u0229\u022B\u022D\u022F\u0231\u0233-\u0239\u023C\u023F-\u0240\u0242\u0247\u0249\u024B\u024D\u024F-\u0293\u0295-\u02AF\u0371\u0373\u0377\u037B-\u037D\u0390\u03AC-\u03CE\u03D0-\u03D1\u03D5-\u03D7\u03D9\u03DB\u03DD\u03DF\u03E1\u03E3\u03E5\u03E7\u03E9\u03EB\u03ED\u03EF-\u03F3\u03F5\u03F8\u03FB-\u03FC\u0430-\u045F\u0461\u0463\u0465\u0467\u0469\u046B\u046D\u046F\u0471\u0473\u0475\u0477\u0479\u047B\u047D\u047F\u0481\u048B\u048D\u048F\u0491\u0493\u0495\u0497\u0499\u049B\u049D\u049F\u04A1\u04A3\u04A5\u04A7\u04A9\u04AB\u04AD\u04AF\u04B1\u04B3\u04B5\u04B7\u04B9\u04BB\u04BD\u04BF\u04C2\u04C4\u04C6\u04C8\u04CA\u04CC\u04CE-\u04CF\u04D1\u04D3\u04D5\u04D7\u04D9\u04DB\u04DD\u04DF\u04E1\u04E3\u04E5\u04E7\u04E9\u04EB\u04ED\u04EF\u04F1\u04F3\u04F5\u04F7\u04F9\u04FB\u04FD\u04FF\u0501\u0503\u0505\u0507\u0509\u050B\u050D\u050F\u0511\u0513\u0515\u0517\u0519\u051B\u051D\u051F\u0521\u0523\u0525\u0527\u0529\u052B\u052D\u052F\u0561-\u0587\u13F8-\u13FD\u1D00-\u1D2B\u1D6B-\u1D77\u1D79-\u1D9A\u1E01\u1E03\u1E05\u1E07\u1E09\u1E0B\u1E0D\u1E0F\u1E11\u1E13\u1E15\u1E17\u1E19\u1E1B\u1E1D\u1E1F\u1E21\u1E23\u1E25\u1E27\u1E29\u1E2B\u1E2D\u1E2F\u1E31\u1E33\u1E35\u1E37\u1E39\u1E3B\u1E3D\u1E3F\u1E41\u1E43\u1E45\u1E47\u1E49\u1E4B\u1E4D\u1E4F\u1E51\u1E53\u1E55\u1E57\u1E59\u1E5B\u1E5D\u1E5F\u1E61\u1E63\u1E65\u1E67\u1E69\u1E6B\u1E6D\u1E6F\u1E71\u1E73\u1E75\u1E77\u1E79\u1E7B\u1E7D\u1E7F\u1E81\u1E83\u1E85\u1E87\u1E89\u1E8B\u1E8D\u1E8F\u1E91\u1E93\u1E95-\u1E9D\u1E9F\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7\u1EB9\u1EBB\u1EBD\u1EBF\u1EC1\u1EC3\u1EC5\u1EC7\u1EC9\u1ECB\u1ECD\u1ECF\u1ED1\u1ED3\u1ED5\u1ED7\u1ED9\u1EDB\u1EDD\u1EDF\u1EE1\u1EE3\u1EE5\u1EE7\u1EE9\u1EEB\u1EED\u1EEF\u1EF1\u1EF3\u1EF5\u1EF7\u1EF9\u1EFB\u1EFD\u1EFF-\u1F07\u1F10-\u1F15\u1F20-\u1F27\u1F30-\u1F37\u1F40-\u1F45\u1F50-\u1F57\u1F60-\u1F67\u1F70-\u1F7D\u1F80-\u1F87\u1F90-\u1F97\u1FA0-\u1FA7\u1FB0-\u1FB4\u1FB6-\u1FB7\u1FBE\u1FC2-\u1FC4\u1FC6-\u1FC7\u1FD0-\u1FD3\u1FD6-\u1FD7\u1FE0-\u1FE7\u1FF2-\u1FF4\u1FF6-\u1FF7\u210A\u210E-\u210F\u2113\u212F\u2134\u2139\u213C-\u213D\u2146-\u2149\u214E\u2184\u2C30-\u2C5E\u2C61\u2C65-\u2C66\u2C68\u2C6A\u2C6C\u2C71\u2C73-\u2C74\u2C76-\u2C7B\u2C81\u2C83\u2C85\u2C87\u2C89\u2C8B\u2C8D\u2C8F\u2C91\u2C93\u2C95\u2C97\u2C99\u2C9B\u2C9D\u2C9F\u2CA1\u2CA3\u2CA5\u2CA7\u2CA9\u2CAB\u2CAD\u2CAF\u2CB1\u2CB3\u2CB5\u2CB7\u2CB9\u2CBB\u2CBD\u2CBF\u2CC1\u2CC3\u2CC5\u2CC7\u2CC9\u2CCB\u2CCD\u2CCF\u2CD1\u2CD3\u2CD5\u2CD7\u2CD9\u2CDB\u2CDD\u2CDF\u2CE1\u2CE3-\u2CE4\u2CEC\u2CEE\u2CF3\u2D00-\u2D25\u2D27\u2D2D\uA641\uA643\uA645\uA647\uA649\uA64B\uA64D\uA64F\uA651\uA653\uA655\uA657\uA659\uA65B\uA65D\uA65F\uA661\uA663\uA665\uA667\uA669\uA66B\uA66D\uA681\uA683\uA685\uA687\uA689\uA68B\uA68D\uA68F\uA691\uA693\uA695\uA697\uA699\uA69B\uA723\uA725\uA727\uA729\uA72B\uA72D\uA72F-\uA731\uA733\uA735\uA737\uA739\uA73B\uA73D\uA73F\uA741\uA743\uA745\uA747\uA749\uA74B\uA74D\uA74F\uA751\uA753\uA755\uA757\uA759\uA75B\uA75D\uA75F\uA761\uA763\uA765\uA767\uA769\uA76B\uA76D\uA76F\uA771-\uA778\uA77A\uA77C\uA77F\uA781\uA783\uA785\uA787\uA78C\uA78E\uA791\uA793-\uA795\uA797\uA799\uA79B\uA79D\uA79F\uA7A1\uA7A3\uA7A5\uA7A7\uA7A9\uA7B5\uA7B7\uA7FA\uAB30-\uAB5A\uAB60-\uAB65\uAB70-\uABBF\uFB00-\uFB06\uFB13-\uFB17\uFF41-\uFF5A] @@ -503,7 +497,7 @@ Pc = [\u005F\u203F-\u2040\u2054\uFE33-\uFE34\uFE4D-\uFE4F\uFF3F] // Separator, Space Zs = [\u0020\u00A0\u1680\u2000-\u200A\u202F\u205F\u3000] -/* Tokens */ +// Tokens BreakToken = "break" !IdentifierPart CaseToken = "case" !IdentifierPart @@ -542,7 +536,7 @@ VoidToken = "void" !IdentifierPart WhileToken = "while" !IdentifierPart WithToken = "with" !IdentifierPart -/* Skipped */ +// Skipped __ = (WhiteSpace / LineTerminatorSequence / Comment)* @@ -550,7 +544,7 @@ __ _ = (WhiteSpace / MultiLineCommentNoLineTerminator)* -/* Automatic Semicolon Insertion */ +// Automatic Semicolon Insertion EOS = __ ";"