Compare commits

...

10 Commits

  1. 3
      .eslintignore
  2. 2
      .eslintrc.json
  3. 4
      .gitignore
  4. 88
      README.md
  5. 72
      benchmark/benchmarks.js
  6. 197
      benchmark/index.js
  7. 218
      benchmark/runner.js
  8. 19
      benchmark/server
  9. 14
      experiments/run-test.js
  10. 5
      experiments/test.pegjs
  11. 103
      gulpfile.js
  12. 134
      lib/compiler/asts.js
  13. 123
      lib/compiler/index.js
  14. 95
      lib/compiler/js.js
  15. 72
      lib/compiler/opcodes.js
  16. 816
      lib/compiler/passes/generate-bytecode.js
  17. 2656
      lib/compiler/passes/generate-js.js
  18. 60
      lib/compiler/passes/remove-proxy-rules.js
  19. 78
      lib/compiler/passes/report-duplicate-labels.js
  20. 26
      lib/compiler/passes/report-duplicate-rules.js
  21. 50
      lib/compiler/passes/report-infinite-recursion.js
  22. 38
      lib/compiler/passes/report-infinite-repetition.js
  23. 22
      lib/compiler/passes/report-undefined-rules.js
  24. 114
      lib/compiler/visitor.js
  25. 16
      lib/grammar-error.js
  26. 1643
      lib/parser.js
  27. 91
      lib/peg.js
  28. 52
      notes.txt
  29. 44
      package.json
  30. 33
      src/parser.pegjs
  31. 315
      test/api/generated-parser-api.spec.js
  32. 374
      test/api/pegjs-api.spec.js
  33. 128
      test/api/plugin-api.spec.js
  34. 3091
      test/behavior/generated-parser-behavior.spec.js
  35. 2
      test/server
  36. 1288
      test/unit/compiler/passes/generate-bytecode.spec.js
  37. 164
      test/unit/compiler/passes/helpers.js
  38. 92
      test/unit/compiler/passes/remove-proxy-rules.spec.js
  39. 102
      test/unit/compiler/passes/report-duplicate-labels.spec.js
  40. 24
      test/unit/compiler/passes/report-duplicate-rules.spec.js
  41. 214
      test/unit/compiler/passes/report-infinite-recursion.spec.js
  42. 174
      test/unit/compiler/passes/report-infinite-repetition.spec.js
  43. 18
      test/unit/compiler/passes/report-undefined-rules.spec.js
  44. 1304
      test/unit/parser.spec.js
  45. 5757
      yarn.lock

3
.eslintignore

@ -0,0 +1,3 @@
lib/parser.js
test/vendor/**/*
benchmark/vendor/**/*

2
.eslintrc.json

@ -1,3 +1,3 @@
{
"extends": "dmajda"
"extends": "@joepie91/eslint-config"
}

4
.gitignore

@ -1,3 +1,3 @@
browser/*
browser
examples/*.js
node_modules/*
node_modules

88
README.md

@ -1,9 +1,6 @@
[![Build status](https://img.shields.io/travis/pegjs/pegjs.svg)](https://travis-ci.org/pegjs/pegjs)
[![npm version](https://img.shields.io/npm/v/pegjs.svg)](https://www.npmjs.com/package/pegjs)
[![Bower version](https://img.shields.io/bower/v/pegjs.svg)](https://github.com/pegjs/bower)
[![License](https://img.shields.io/badge/license-mit-blue.svg)](https://opensource.org/licenses/MIT)
PEG.js
PEG-Redux
======
PEG.js is a simple parser generator for JavaScript that produces fast parsers
@ -11,6 +8,10 @@ with excellent error reporting. You can use it to process complex data or
computer languages and build transformers, interpreters, compilers and other
tools easily.
PEG-Redux is a __work-in-progress__ fork of PEG.js, with the aim of continuing
maintenance on the PEG.js project, while adding support for modern features
such as modules.
Features
--------
@ -20,49 +21,48 @@ Features
* Based on [parsing expression
grammar](http://en.wikipedia.org/wiki/Parsing_expression_grammar) formalism
— more powerful than traditional LL(*k*) and LR(*k*) parsers
* Usable [from your browser](https://pegjs.org/online), from the command line,
or via JavaScript API
* Usable from your browser, from the command line, or via JavaScript API
Getting Started
---------------
Differences from the original PEG.js
--------
[Online version](https://pegjs.org/online) is the easiest way to generate a
parser. Just enter your grammar, try parsing few inputs, and download generated
parser code.
* The plugin API has been dropped for now, as it was underspecified and not very commonly used. A new, more robust and extensive plugin API may come to exist in the future, if it turns out that there is a high demand for customizations that wouldn't fit into the PEG-Redux project itself.
* Bower and stand-alone browser builds have been discontinued. Please use a bundler (see below) instead.
* AMD, UMD and globals support have been discontinued. The generated parsers now only support CommonJS.
* Module support. Both for importing other PEGRedux files, and for `require()`ing JS modules.
Installation
------------
### Node.js
To use the `pegjs` command, install PEG.js globally:
To use the `pegjs` command, install PEG-Redux globally:
```console
$ npm install -g pegjs
$ npm install -g peg-redux
```
To use the JavaScript API, install PEG.js locally:
To use the JavaScript API, install PEG-Redux locally:
```console
$ npm install pegjs
$ npm install peg-redux
```
If you need both the `pegjs` command and the JavaScript API, install PEG.js both
ways.
If you need both the `pegjs` command and the JavaScript API, install PEG-Redux
both ways.
### Browser
[Download](https://pegjs.org/#download) the PEG.js library (regular or minified
version) or install it using Bower:
PEG-Redux works with bundlers such as [Browserify](http://browserify.org/), [Parcel](https://parceljs.org/) and [Webpack](https://webpack.js.org/).
```console
$ bower install pegjs
```
Simply `require()` and use the module like you would in Node.js. The one exception is that modules (either PEG-Redux or Javascript modules) are not currently supported in browser environments.
Bower and standalone builds have been discontinued in this fork. Getting started with Browserify will only take a few minutes, and give you a better developer experience.
Generating a Parser
-------------------
PEG.js generates parser from a grammar that describes expected input and can
PEG-Redux generates parser from a grammar that describes expected input and can
specify what the parser returns (using semantic actions on matched parts of the
input). Generated parser itself is a JavaScript object with a simple API.
@ -100,26 +100,20 @@ You can tweak the generated parser with several options:
`peg.generate`
* `--extra-options-file` — file with additional options (in JSON format) to
pass to `peg.generate`
* `--format` — format of the generated parser: `amd`, `commonjs`, `globals`,
`umd` (default: `commonjs`)
* `--optimize` — selects between optimizing the generated parser for parsing
speed (`speed`) or code size (`size`) (default: `speed`)
* `--plugin` — makes PEG.js use a specified plugin (can be specified multiple
* `--plugin` — makes PEG-Redux use a specified plugin (can be specified multiple
times)
* `--trace` — makes the parser trace its progress
### JavaScript API
In Node.js, require the PEG.js parser generator module:
Require the PEG-Redux parser generator module:
```javascript
var peg = require("pegjs");
var peg = require("peg-redux");
```
In browser, include the PEG.js library in your web page or application using the
`<script>` tag. If PEG.js detects an AMD loader, it will define itself as a
module, otherwise the API will be available in the `peg` global object.
To generate a parser, call the `peg.generate` method and pass your grammar as a
parameter:
@ -142,14 +136,7 @@ object to `peg.generate`. The following options are supported:
`false`)
* `dependencies` — parser dependencies, the value is an object which maps
variables used to access the dependencies in the parser to module IDs used
to load them; valid only when `format` is set to `"amd"`, `"commonjs"`, or
`"umd"` (default: `{}`)
* `exportVar` — name of a global variable into which the parser object is
assigned to when no module loader is detected; valid only when `format` is
set to `"globals"` or `"umd"` (default: `null`)
* `format` — format of the genreated parser (`"amd"`, `"bare"`, `"commonjs"`,
`"globals"`, or `"umd"`); valid only when `output` is set to `"source"`
(default: `"bare"`)
to load them.
* `optimize`— selects between optimizing the generated parser for parsing
speed (`"speed"`) or code size (`"size"`) (default: `"speed"`)
* `output` — if set to `"parser"`, the method will return generated parser
@ -503,25 +490,14 @@ environments:
* Safari
* Opera
However, please note that it is currently only actively tested in Node.js and Firefox. This will likely change in the future.
Development
-----------
* [Project website](https://pegjs.org/)
* [Wiki](https://github.com/pegjs/pegjs/wiki)
* [Source code](https://github.com/pegjs/pegjs)
* [Issue tracker](https://github.com/pegjs/pegjs/issues)
* [Google Group](http://groups.google.com/group/pegjs)
* [Twitter](http://twitter.com/peg_js)
PEG.js is developed by [David Majda](https://majda.cz/)
([@dmajda](http://twitter.com/dmajda)). The [Bower
package](https://github.com/pegjs/bower) is maintained by [Michel
Krämer](http://www.michel-kraemer.com/)
([@michelkraemer](https://twitter.com/michelkraemer)).
PEG-Redux is maintained by [Sven Slootweg (joepie91)](http://cryto.net/~joepie91).
The original PEG.js was developed by [David Majda](http://majda.cz/) ([@dmajda](http://twitter.com/dmajda)).
You are welcome to contribute code. Unless your contribution is really trivial
you should get in touch with me first — this can prevent wasted effort on both
sides. You can send code both as a patch or a GitHub pull request.
Note that PEG.js is still very much work in progress. There are no compatibility
guarantees until version 1.0.
sides. You can send code both as a patch or a pull request.

72
benchmark/benchmarks.js

@ -1,42 +1,42 @@
"use strict";
let benchmarks = [
{
id: "json",
title: "JSON",
tests: [
{ file: "example1.json", title: "Example 1" },
{ file: "example2.json", title: "Example 2" },
{ file: "example3.json", title: "Example 3" },
{ file: "example4.json", title: "Example 4" },
{ file: "example5.json", title: "Example 5" }
]
},
{
id: "css",
title: "CSS",
tests: [
{ file: "blueprint/src/reset.css", title: "Blueprint - reset.css (source)" },
{ file: "blueprint/src/typography.css", title: "Blueprint - typography.css (source)" },
{ file: "blueprint/src/forms.css", title: "Blueprint - forms.css (source)" },
{ file: "blueprint/src/grid.css", title: "Blueprint - grid.css (source)" },
{ file: "blueprint/src/print.css", title: "Blueprint - print.css (source)" },
// Contains syntax errors.
// { file: "blueprint/src/ie.css", title: "Blueprint - ie.css (source)" },
{ file: "blueprint/min/screen.css", title: "Blueprint - screen.css (minified)" },
{ file: "blueprint/min/print.css", title: "Blueprint - print.css (minified)" },
// Contains syntax errors.
// { file: "blueprint/min/ie.css", title: "Blueprint - ie.css (minified)" },
{ file: "960.gs/src/reset.css", title: "960.gs - reset.css (source)" },
{ file: "960.gs/src/text.css", title: "960.gs - text.css (source)" },
{ file: "960.gs/src/960.css", title: "960.gs - 960.css (source)" },
{ file: "960.gs/src/960_24_col.css", title: "960.gs - 960_24_col.css (source)" },
{ file: "960.gs/min/reset.css", title: "960.gs - reset.css (minified)" },
{ file: "960.gs/min/text.css", title: "960.gs - text.css (minified)" },
{ file: "960.gs/min/960.css", title: "960.gs - 960.css (minified)" },
{ file: "960.gs/min/960_24_col.css", title: "960.gs - 960_24_col.css (minified)" }
]
}
{
id: "json",
title: "JSON",
tests: [
{ file: "example1.json", title: "Example 1" },
{ file: "example2.json", title: "Example 2" },
{ file: "example3.json", title: "Example 3" },
{ file: "example4.json", title: "Example 4" },
{ file: "example5.json", title: "Example 5" }
]
},
{
id: "css",
title: "CSS",
tests: [
{ file: "blueprint/src/reset.css", title: "Blueprint - reset.css (source)" },
{ file: "blueprint/src/typography.css", title: "Blueprint - typography.css (source)" },
{ file: "blueprint/src/forms.css", title: "Blueprint - forms.css (source)" },
{ file: "blueprint/src/grid.css", title: "Blueprint - grid.css (source)" },
{ file: "blueprint/src/print.css", title: "Blueprint - print.css (source)" },
// Contains syntax errors.
// { file: "blueprint/src/ie.css", title: "Blueprint - ie.css (source)" },
{ file: "blueprint/min/screen.css", title: "Blueprint - screen.css (minified)" },
{ file: "blueprint/min/print.css", title: "Blueprint - print.css (minified)" },
// Contains syntax errors.
// { file: "blueprint/min/ie.css", title: "Blueprint - ie.css (minified)" },
{ file: "960.gs/src/reset.css", title: "960.gs - reset.css (source)" },
{ file: "960.gs/src/text.css", title: "960.gs - text.css (source)" },
{ file: "960.gs/src/960.css", title: "960.gs - 960.css (source)" },
{ file: "960.gs/src/960_24_col.css", title: "960.gs - 960_24_col.css (source)" },
{ file: "960.gs/min/reset.css", title: "960.gs - reset.css (minified)" },
{ file: "960.gs/min/text.css", title: "960.gs - text.css (minified)" },
{ file: "960.gs/min/960.css", title: "960.gs - 960.css (minified)" },
{ file: "960.gs/min/960_24_col.css", title: "960.gs - 960_24_col.css (minified)" }
]
}
];
module.exports = benchmarks;

197
benchmark/index.js

@ -6,22 +6,22 @@ let Runner = require("./runner.js");
let benchmarks = require("./benchmarks.js");
$("#run").click(() => {
// Results Table Manipulation
// Results Table Manipulation
let resultsTable = $("#results-table");
let resultsTable = $("#results-table");
function appendHeading(heading) {
resultsTable.append(
"<tr class='heading'><th colspan='4'>" + heading + "</th></tr>"
);
}
function appendHeading(heading) {
resultsTable.append(
"<tr class='heading'><th colspan='4'>" + heading + "</th></tr>"
);
}
function appendResult(klass, title, url, inputSize, parseTime) {
const KB = 1024;
const MS_IN_S = 1000;
function appendResult(klass, title, url, inputSize, parseTime) {
const KB = 1024;
const MS_IN_S = 1000;
resultsTable.append(
"<tr class='" + klass + "'>"
resultsTable.append(
"<tr class='" + klass + "'>"
+ "<td class='title'>"
+ (url !== null ? "<a href='" + url + "'>" : "")
+ title
@ -46,93 +46,94 @@ $("#run").click(() => {
+ "&nbsp;<span class='unit'>kB/s</span>"
+ "</td>"
+ "</tr>"
);
}
// Main
// Each input is parsed multiple times and the results are averaged. We
// do this for two reasons:
//
// 1. To warm up the interpreter (PEG.js-generated parsers will be
// most likely used repeatedly, so it makes sense to measure
// performance after warming up).
//
// 2. To minimize random errors.
let runCount = parseInt($("#run-count").val(), 10);
let options = {
cache: $("#cache").is(":checked"),
optimize: $("#optimize").val()
};
if (isNaN(runCount) || runCount <= 0) {
alert("Number of runs must be a positive integer.");
return;
}
Runner.run(benchmarks, runCount, options, {
readFile(file) {
return $.ajax({
type: "GET",
url: file,
dataType: "text",
async: false
}).responseText;
},
testStart() {
// Nothing to do.
},
testFinish(benchmark, test, inputSize, parseTime) {
appendResult(
"individual",
test.title,
benchmark.id + "/" + test.file,
inputSize,
parseTime
);
},
benchmarkStart(benchmark) {
appendHeading(benchmark.title);
},
benchmarkFinish(benchmark, inputSize, parseTime) {
appendResult(
"benchmark-total",
benchmark.title + " total",
null,
inputSize,
parseTime
);
},
start() {
$("#run-count, #cache, #run").attr("disabled", "disabled");
resultsTable.show();
$("#results-table tr").slice(1).remove();
},
finish(inputSize, parseTime) {
appendResult(
"total",
"Total",
null,
inputSize,
parseTime
);
$.scrollTo("max", { axis: "y", duration: 500 });
$("#run-count, #cache, #run").removeAttr("disabled");
}
});
);
}
// Main
// Each input is parsed multiple times and the results are averaged. We
// do this for two reasons:
//
// 1. To warm up the interpreter (PEG.js-generated parsers will be
// most likely used repeatedly, so it makes sense to measure
// performance after warming up).
//
// 2. To minimize random errors.
let runCount = parseInt($("#run-count").val(), 10);
let options = {
cache: $("#cache").is(":checked"),
optimize: $("#optimize").val()
};
if (isNaN(runCount) || runCount <= 0) {
// eslint-disable-next-line no-alert
alert("Number of runs must be a positive integer.");
return;
}
Runner.run(benchmarks, runCount, options, {
readFile(file) {
return $.ajax({
type: "GET",
url: file,
dataType: "text",
async: false
}).responseText;
},
testStart() {
// Nothing to do.
},
testFinish(benchmark, test, inputSize, parseTime) {
appendResult(
"individual",
test.title,
benchmark.id + "/" + test.file,
inputSize,
parseTime
);
},
benchmarkStart(benchmark) {
appendHeading(benchmark.title);
},
benchmarkFinish(benchmark, inputSize, parseTime) {
appendResult(
"benchmark-total",
benchmark.title + " total",
null,
inputSize,
parseTime
);
},
start() {
$("#run-count, #cache, #run").attr("disabled", "disabled");
resultsTable.show();
$("#results-table tr").slice(1).remove();
},
finish(inputSize, parseTime) {
appendResult(
"total",
"Total",
null,
inputSize,
parseTime
);
$.scrollTo("max", { axis: "y", duration: 500 });
$("#run-count, #cache, #run").removeAttr("disabled");
}
});
});
$(document).ready(() => {
$("#run").focus();
$("#run").focus();
});

218
benchmark/runner.js

@ -1,118 +1,116 @@
"use strict";
/* global setTimeout */
let peg = require("../lib/peg");
let Runner = {
run(benchmarks, runCount, options, callbacks) {
// Queue
let Q = {
functions: [],
add(f) {
this.functions.push(f);
},
run() {
if (this.functions.length > 0) {
this.functions.shift()();
// We can't use |arguments.callee| here because |this| would get
// messed-up in that case.
setTimeout(() => { Q.run(); }, 0);
}
}
};
// The benchmark itself is factored out into several functions (some of them
// generated), which are enqueued and run one by one using |setTimeout|. We
// do this for two reasons:
//
// 1. To avoid bowser mechanism for interrupting long-running scripts to
// kick-in (or at least to not kick-in that often).
//
// 2. To ensure progressive rendering of results in the browser (some
// browsers do not render at all when running JavaScript code).
//
// The enqueued functions share state, which is all stored in the properties
// of the |state| object.
let state = {};
function initialize() {
callbacks.start();
state.totalInputSize = 0;
state.totalParseTime = 0;
}
function benchmarkInitializer(benchmark) {
return function() {
callbacks.benchmarkStart(benchmark);
state.parser = peg.generate(
callbacks.readFile("../examples/" + benchmark.id + ".pegjs"),
options
);
state.benchmarkInputSize = 0;
state.benchmarkParseTime = 0;
};
}
function testRunner(benchmark, test) {
return function() {
callbacks.testStart(benchmark, test);
let input = callbacks.readFile(benchmark.id + "/" + test.file);
let parseTime = 0;
for (let i = 0; i < runCount; i++) {
let t = (new Date()).getTime();
state.parser.parse(input);
parseTime += (new Date()).getTime() - t;
}
let averageParseTime = parseTime / runCount;
callbacks.testFinish(benchmark, test, input.length, averageParseTime);
state.benchmarkInputSize += input.length;
state.benchmarkParseTime += averageParseTime;
};
}
function benchmarkFinalizer(benchmark) {
return function() {
callbacks.benchmarkFinish(
benchmark,
state.benchmarkInputSize,
state.benchmarkParseTime
);
state.totalInputSize += state.benchmarkInputSize;
state.totalParseTime += state.benchmarkParseTime;
};
}
function finalize() {
callbacks.finish(state.totalInputSize, state.totalParseTime);
}
// Main
Q.add(initialize);
benchmarks.forEach(benchmark => {
Q.add(benchmarkInitializer(benchmark));
benchmark.tests.forEach(test => {
Q.add(testRunner(benchmark, test));
});
Q.add(benchmarkFinalizer(benchmark));
});
Q.add(finalize);
Q.run();
}
run(benchmarks, runCount, options, callbacks) {
// Queue
let Q = {
functions: [],
add(f) {
this.functions.push(f);
},
run() {
if (this.functions.length > 0) {
this.functions.shift()();
// We can't use |arguments.callee| here because |this| would get
// messed-up in that case.
setTimeout(() => { Q.run(); }, 0);
}
}
};
// The benchmark itself is factored out into several functions (some of them
// generated), which are enqueued and run one by one using |setTimeout|. We
// do this for two reasons:
//
// 1. To avoid bowser mechanism for interrupting long-running scripts to
// kick-in (or at least to not kick-in that often).
//
// 2. To ensure progressive rendering of results in the browser (some
// browsers do not render at all when running JavaScript code).
//
// The enqueued functions share state, which is all stored in the properties
// of the |state| object.
let state = {};
function initialize() {
callbacks.start();
state.totalInputSize = 0;
state.totalParseTime = 0;
}
function benchmarkInitializer(benchmark) {
return function() {
callbacks.benchmarkStart(benchmark);
state.parser = peg.generate(
callbacks.readFile("../examples/" + benchmark.id + ".pegjs"),
options
);
state.benchmarkInputSize = 0;
state.benchmarkParseTime = 0;
};
}
function testRunner(benchmark, test) {
return function() {
callbacks.testStart(benchmark, test);
let input = callbacks.readFile(benchmark.id + "/" + test.file);
let parseTime = 0;
for (let i = 0; i < runCount; i++) {
let t = (new Date()).getTime();
state.parser.parse(input);
parseTime += (new Date()).getTime() - t;
}
let averageParseTime = parseTime / runCount;
callbacks.testFinish(benchmark, test, input.length, averageParseTime);
state.benchmarkInputSize += input.length;
state.benchmarkParseTime += averageParseTime;
};
}
function benchmarkFinalizer(benchmark) {
return function() {
callbacks.benchmarkFinish(
benchmark,
state.benchmarkInputSize,
state.benchmarkParseTime
);
state.totalInputSize += state.benchmarkInputSize;
state.totalParseTime += state.benchmarkParseTime;
};
}
function finalize() {
callbacks.finish(state.totalInputSize, state.totalParseTime);
}
// Main
Q.add(initialize);
benchmarks.forEach(benchmark => {
Q.add(benchmarkInitializer(benchmark));
benchmark.tests.forEach(test => {
Q.add(testRunner(benchmark, test));
});
Q.add(benchmarkFinalizer(benchmark));
});
Q.add(finalize);
Q.run();
}
};
module.exports = Runner;

19
benchmark/server

@ -21,16 +21,17 @@ app.use(express.static(__dirname));
app.use("/examples", express.static(`${__dirname}/../examples`));
app.get("/bundle.js", (req, res) => {
let files = glob.sync(`${__dirname}/**/*.js`, {
ignore: `${__dirname}/vendor/**/*`
});
browserify(files)
.transform(babelify, { presets: "es2015", compact: false })
.bundle()
.pipe(res);
let files = glob.sync(`${__dirname}/**/*.js`, {
ignore: `${__dirname}/vendor/**/*`
});
browserify(files)
.transform(babelify, { presets: "env", compact: false })
.bundle()
.pipe(res);
});
app.listen(8000, () => {
console.log("Benchmark server running at http://localhost:8000...");
// eslint-disable-next-line no-console
console.log("Benchmark server running at http://localhost:8000...");
});

14
experiments/run-test.js

@ -0,0 +1,14 @@
"use strict";
const fs = require("fs");
const path = require("path");
const util = require("util");
const generate = require("../").generate;
const parser = require("../lib/parser");
let grammar = fs.readFileSync(path.join(__dirname, "test.pegjs"), { encoding: "utf-8" });
// let parseResult = parser.parse(grammar);
let parseResult = generate(grammar);
console.log(util.inspect(parseResult, { depth: null, colors: true }));

5
experiments/test.pegjs

@ -0,0 +1,5 @@
import Foo from "./util.pegjs"
import { CommaDelimited as DelimitedNumber } from "./delimited-number"
TopLevelRule
= "hello"

103
gulpfile.js

@ -1,108 +1,21 @@
"use strict";
/* eslint-env node */
let babelify = require("babelify");
let browserify = require("browserify");
let buffer = require("vinyl-buffer");
let del = require("del");
let eslint = require("gulp-eslint");
let gulp = require("gulp");
let header = require("gulp-header");
let mocha = require("gulp-mocha");
let package_ = require("./package");
let peg = require("./lib/peg");
let rename = require("gulp-rename");
let runSequence = require("run-sequence");
let source = require("vinyl-source-stream");
let spawn = require("child_process").spawn;
let transform = require("gulp-transform");
let uglify = require("gulp-uglify");
const HEADER = [
"// PEG.js " + package_.version,
"//",
"// https://pegjs.org/",
"//",
"// Copyright (c) 2010-2016 David Majda",
"// Licensed under the MIT License.",
""
].map(line => `${line}\n`).join("");
const JS_FILES = [
"lib/**/*.js",
"!lib/parser.js",
"test/**/*.js",
"test/server",
"!test/vendor/**/*",
"benchmark/**/*.js",
"benchmark/run",
"benchmark/server",
"!benchmark/vendor/**/*",
"bin/pegjs",
"gulpfile.js"
];
const TEST_FILES = [
"test/**/*.js",
"!test/vendor/**/*"
];
function generate(contents) {
return peg.generate(contents.toString(), {
output: "source",
format: "commonjs"
});
return peg.generate(contents.toString(), {
output: "source",
format: "commonjs"
});
}
// Run ESLint on all JavaScript files.
gulp.task("lint", () =>
gulp.src(JS_FILES)
.pipe(eslint())
.pipe(eslint.format())
.pipe(eslint.failAfterError())
);
// Run tests.
gulp.task("test", () =>
gulp.src(TEST_FILES, { read: false })
.pipe(mocha())
);
// Run benchmarks.
gulp.task("benchmark", () =>
spawn("benchmark/run", { stdio: "inherit" })
);
// Create the browser build.
gulp.task("browser:build", () =>
browserify("lib/peg.js", { standalone: "peg" })
.transform(babelify, { presets: "es2015", compact: false })
.bundle()
.pipe(source("peg.js"))
.pipe(header(HEADER))
.pipe(gulp.dest("browser"))
.pipe(rename({ suffix: ".min" }))
.pipe(buffer())
.pipe(uglify())
.pipe(header(HEADER))
.pipe(gulp.dest("browser"))
);
// Delete the browser build.
gulp.task("browser:clean", () =>
del("browser")
);
// Generate the grammar parser.
gulp.task("parser", () =>
gulp.src("src/parser.pegjs")
.pipe(transform(generate))
.pipe(rename({ extname: ".js" }))
.pipe(gulp.dest("lib"))
);
// Default task.
gulp.task("default", cb =>
runSequence("lint", "test", cb)
gulp.src("src/parser.pegjs")
.pipe(transform("utf8", generate))
.pipe(rename({ extname: ".js" }))
.pipe(gulp.dest("lib"))
);

134
lib/compiler/asts.js

@ -4,73 +4,73 @@ let visitor = require("./visitor");
// AST utilities.
let asts = {
findRule(ast, name) {
for (let i = 0; i < ast.rules.length; i++) {
if (ast.rules[i].name === name) {
return ast.rules[i];
}
}
return undefined;
},
indexOfRule(ast, name) {
for (let i = 0; i < ast.rules.length; i++) {
if (ast.rules[i].name === name) {
return i;
}
}
return -1;
},
alwaysConsumesOnSuccess(ast, node) {
function consumesTrue() { return true; }
function consumesFalse() { return false; }
function consumesExpression(node) {
return consumes(node.expression);
}
let consumes = visitor.build({
rule: consumesExpression,
named: consumesExpression,
choice(node) {
return node.alternatives.every(consumes);
},
action: consumesExpression,
sequence(node) {
return node.elements.some(consumes);
},
labeled: consumesExpression,
text: consumesExpression,
simple_and: consumesFalse,
simple_not: consumesFalse,
optional: consumesFalse,
zero_or_more: consumesFalse,
one_or_more: consumesExpression,
group: consumesExpression,
semantic_and: consumesFalse,
semantic_not: consumesFalse,
rule_ref(node) {
return consumes(asts.findRule(ast, node.name));
},
literal(node) {
return node.value !== "";
},
class: consumesTrue,
any: consumesTrue
});
return consumes(node);
}
findRule(ast, name) {
for (let i = 0; i < ast.rules.length; i++) {
if (ast.rules[i].name === name) {
return ast.rules[i];
}
}
return undefined;
},
indexOfRule(ast, name) {
for (let i = 0; i < ast.rules.length; i++) {
if (ast.rules[i].name === name) {
return i;
}
}
return -1;
},
alwaysConsumesOnSuccess(ast, node) {
function consumesTrue() { return true; }
function consumesFalse() { return false; }
function consumesExpression(node) {
return consumes(node.expression);
}
let consumes = visitor.build({
rule: consumesExpression,
named: consumesExpression,
choice(node) {
return node.alternatives.every(consumes);
},
action: consumesExpression,
sequence(node) {
return node.elements.some(consumes);
},
labeled: consumesExpression,
text: consumesExpression,
simple_and: consumesFalse,
simple_not: consumesFalse,
optional: consumesFalse,
zero_or_more: consumesFalse,
one_or_more: consumesExpression,
group: consumesExpression,
semantic_and: consumesFalse,
semantic_not: consumesFalse,
rule_ref(node) {
return consumes(asts.findRule(ast, node.name));
},
literal(node) {
return node.value !== "";
},
class: consumesTrue,
any: consumesTrue
});
return consumes(node);
}
};
module.exports = asts;

123
lib/compiler/index.js

@ -11,81 +11,68 @@ let reportUndefinedRules = require("./passes/report-undefined-rules");
let visitor = require("./visitor");
function processOptions(options, defaults) {
let processedOptions = {};
Object.keys(options).forEach(name => {
processedOptions[name] = options[name];
});
Object.keys(defaults).forEach(name => {
if (!Object.prototype.hasOwnProperty.call(processedOptions, name)) {
processedOptions[name] = defaults[name];
}
});
return processedOptions;
return Object.assign({}, defaults, options);
}
let compiler = {
// AST node visitor builder. Useful mainly for plugins which manipulate the
// AST.
visitor: visitor,
// Compiler passes.
//
// Each pass is a function that is passed the AST. It can perform checks on it
// or modify it as needed. If the pass encounters a semantic error, it throws
// |peg.GrammarError|.
passes: {
check: {
reportUndefinedRules: reportUndefinedRules,
reportDuplicateRules: reportDuplicateRules,
reportDuplicateLabels: reportDuplicateLabels,
reportInfiniteRecursion: reportInfiniteRecursion,
reportInfiniteRepetition: reportInfiniteRepetition
},
transform: {
removeProxyRules: removeProxyRules
},
generate: {
generateBytecode: generateBytecode,
generateJS: generateJS
}
},
// Generates a parser from a specified grammar AST. Throws |peg.GrammarError|
// if the AST contains a semantic error. Note that not all errors are detected
// during the generation and some may protrude to the generated parser and
// cause its malfunction.
compile(ast, passes, options) {
options = options !== undefined ? options : {};
options = processOptions(options, {
allowedStartRules: [ast.rules[0].name],
cache: false,
dependencies: {},
exportVar: null,
format: "bare",
optimize: "speed",
output: "parser",
trace: false
});
// AST node visitor builder. Useful mainly for plugins which manipulate the
// AST.
visitor: visitor,
Object.keys(passes).forEach(stage => {
passes[stage].forEach(p => { p(ast, options); });
});
// Compiler passes.
//
// Each pass is a function that is passed the AST. It can perform checks on it
// or modify it as needed. If the pass encounters a semantic error, it throws
// |peg.GrammarError|.
passes: {
check: {
reportUndefinedRules: reportUndefinedRules,
reportDuplicateRules: reportDuplicateRules,
reportDuplicateLabels: reportDuplicateLabels,
reportInfiniteRecursion: reportInfiniteRecursion,
reportInfiniteRepetition: reportInfiniteRepetition
},
transform: {
removeProxyRules: removeProxyRules
},
generate: {
generateBytecode: generateBytecode,
generateJS: generateJS
}
},
switch (options.output) {
case "parser":
return eval(ast.code);
// Generates a parser from a specified grammar AST. Throws |peg.GrammarError|
// if the AST contains a semantic error. Note that not all errors are detected
// during the generation and some may protrude to the generated parser and
// cause its malfunction.
compile(ast, passes, options = {}) {
let processedOptions = processOptions(options, {
allowedStartRules: [ast.rules[0].name],
cache: false,
dependencies: {},
exportVar: null,
format: "bare",
optimize: "speed",
output: "parser",
trace: false
});
case "source":
return ast.code;
Object.values(passes).forEach((stagePasses) => {
stagePasses.forEach(pass => { pass(ast, processedOptions); });
});
default:
throw new Error("Invalid output format: " + options.output + ".");
}
}
switch (processedOptions.output) {
case "parser":
return eval(ast.code);
case "source":
return ast.code;
// FIXME: Move to Validatem code at entrypoint
default:
throw new Error("Invalid output format: " + processedOptions.output + ".");
}
}
};
module.exports = compiler;

95
lib/compiler/js.js

@ -1,54 +1,59 @@
"use strict";
function hex(ch) { return ch.charCodeAt(0).toString(16).toUpperCase(); }
function hex(character) {
return character
.charCodeAt(0)
.toString(16)
.toUpperCase();
}
// JavaScript code generation helpers.
let js = {
stringEscape(s) {
// ECMA-262, 5th ed., 7.8.4: All characters may appear literally in a string
// literal except for the closing quote character, backslash, carriage
// return, line separator, paragraph separator, and line feed. Any character
// may appear in the form of an escape sequence.
//
// For portability, we also escape all control and non-ASCII characters.
return s
.replace(/\\/g, "\\\\") // backslash
.replace(/"/g, "\\\"") // closing double quote
.replace(/\0/g, "\\0") // null
.replace(/\x08/g, "\\b") // backspace
.replace(/\t/g, "\\t") // horizontal tab
.replace(/\n/g, "\\n") // line feed
.replace(/\v/g, "\\v") // vertical tab
.replace(/\f/g, "\\f") // form feed
.replace(/\r/g, "\\r") // carriage return
.replace(/[\x00-\x0F]/g, ch => "\\x0" + hex(ch))
.replace(/[\x10-\x1F\x7F-\xFF]/g, ch => "\\x" + hex(ch))
.replace(/[\u0100-\u0FFF]/g, ch => "\\u0" + hex(ch))
.replace(/[\u1000-\uFFFF]/g, ch => "\\u" + hex(ch));
},
stringEscape(s) {
// ECMA-262, 5th ed., 7.8.4: All characters may appear literally in a string
// literal except for the closing quote character, backslash, carriage
// return, line separator, paragraph separator, and line feed. Any character
// may appear in the form of an escape sequence.
//
// For portability, we also escape all control and non-ASCII characters.
return s
.replace(/\\/g, "\\\\") // backslash
.replace(/"/g, "\\\"") // closing double quote
.replace(/\0/g, "\\0") // null
.replace(/\x08/g, "\\b") // backspace
.replace(/\t/g, "\\t") // horizontal tab
.replace(/\n/g, "\\n") // line feed
.replace(/\v/g, "\\v") // vertical tab
.replace(/\f/g, "\\f") // form feed
.replace(/\r/g, "\\r") // carriage return
.replace(/[\x00-\x0F]/g, ch => "\\x0" + hex(ch))
.replace(/[\x10-\x1F\x7F-\xFF]/g, ch => "\\x" + hex(ch))
.replace(/[\u0100-\u0FFF]/g, ch => "\\u0" + hex(ch))
.replace(/[\u1000-\uFFFF]/g, ch => "\\u" + hex(ch));
},
regexpClassEscape(s) {
// Based on ECMA-262, 5th ed., 7.8.5 & 15.10.1.
//
// For portability, we also escape all control and non-ASCII characters.
return s
.replace(/\\/g, "\\\\") // backslash
.replace(/\//g, "\\/") // closing slash
.replace(/]/g, "\\]") // closing bracket
.replace(/\^/g, "\\^") // caret
.replace(/-/g, "\\-") // dash
.replace(/\0/g, "\\0") // null
.replace(/\x08/g, "\\b") // backspace
.replace(/\t/g, "\\t") // horizontal tab
.replace(/\n/g, "\\n") // line feed
.replace(/\v/g, "\\v") // vertical tab
.replace(/\f/g, "\\f") // form feed
.replace(/\r/g, "\\r") // carriage return
.replace(/[\x00-\x0F]/g, ch => "\\x0" + hex(ch))
.replace(/[\x10-\x1F\x7F-\xFF]/g, ch => "\\x" + hex(ch))
.replace(/[\u0100-\u0FFF]/g, ch => "\\u0" + hex(ch))
.replace(/[\u1000-\uFFFF]/g, ch => "\\u" + hex(ch));
}
regexpClassEscape(s) {
// Based on ECMA-262, 5th ed., 7.8.5 & 15.10.1.
//
// For portability, we also escape all control and non-ASCII characters.
return s
.replace(/\\/g, "\\\\") // backslash
.replace(/\//g, "\\/") // closing slash
.replace(/]/g, "\\]") // closing bracket
.replace(/\^/g, "\\^") // caret
.replace(/-/g, "\\-") // dash
.replace(/\0/g, "\\0") // null
.replace(/\x08/g, "\\b") // backspace
.replace(/\t/g, "\\t") // horizontal tab
.replace(/\n/g, "\\n") // line feed
.replace(/\v/g, "\\v") // vertical tab
.replace(/\f/g, "\\f") // form feed
.replace(/\r/g, "\\r") // carriage return
.replace(/[\x00-\x0F]/g, ch => "\\x0" + hex(ch))
.replace(/[\x10-\x1F\x7F-\xFF]/g, ch => "\\x" + hex(ch))
.replace(/[\u0100-\u0FFF]/g, ch => "\\u0" + hex(ch))
.replace(/[\u1000-\uFFFF]/g, ch => "\\u" + hex(ch));
}
};
module.exports = js;

72
lib/compiler/opcodes.js

@ -2,53 +2,53 @@
// Bytecode instruction opcodes.
let opcodes = {
// Stack Manipulation
// Stack Manipulation
PUSH: 0, // PUSH c
PUSH_UNDEFINED: 1, // PUSH_UNDEFINED
PUSH_NULL: 2, // PUSH_NULL
PUSH_FAILED: 3, // PUSH_FAILED
PUSH_EMPTY_ARRAY: 4, // PUSH_EMPTY_ARRAY
PUSH_CURR_POS: 5, // PUSH_CURR_POS
POP: 6, // POP
POP_CURR_POS: 7, // POP_CURR_POS
POP_N: 8, // POP_N n
NIP: 9, // NIP
APPEND: 10, // APPEND
WRAP: 11, // WRAP n
TEXT: 12, // TEXT
PUSH: 0, // PUSH c
PUSH_UNDEFINED: 1, // PUSH_UNDEFINED
PUSH_NULL: 2, // PUSH_NULL
PUSH_FAILED: 3, // PUSH_FAILED
PUSH_EMPTY_ARRAY: 4, // PUSH_EMPTY_ARRAY
PUSH_CURR_POS: 5, // PUSH_CURR_POS
POP: 6, // POP
POP_CURR_POS: 7, // POP_CURR_POS
POP_N: 8, // POP_N n
NIP: 9, // NIP
APPEND: 10, // APPEND
WRAP: 11, // WRAP n
TEXT: 12, // TEXT
// Conditions and Loops
// Conditions and Loops
IF: 13, // IF t, f
IF_ERROR: 14, // IF_ERROR t, f
IF_NOT_ERROR: 15, // IF_NOT_ERROR t, f
WHILE_NOT_ERROR: 16, // WHILE_NOT_ERROR b
IF: 13, // IF t, f
IF_ERROR: 14, // IF_ERROR t, f
IF_NOT_ERROR: 15, // IF_NOT_ERROR t, f
WHILE_NOT_ERROR: 16, // WHILE_NOT_ERROR b
// Matching
// Matching
MATCH_ANY: 17, // MATCH_ANY a, f, ...
MATCH_STRING: 18, // MATCH_STRING s, a, f, ...
MATCH_STRING_IC: 19, // MATCH_STRING_IC s, a, f, ...
MATCH_REGEXP: 20, // MATCH_REGEXP r, a, f, ...
ACCEPT_N: 21, // ACCEPT_N n
ACCEPT_STRING: 22, // ACCEPT_STRING s
FAIL: 23, // FAIL e
MATCH_ANY: 17, // MATCH_ANY a, f, ...
MATCH_STRING: 18, // MATCH_STRING s, a, f, ...
MATCH_STRING_IC: 19, // MATCH_STRING_IC s, a, f, ...
MATCH_REGEXP: 20, // MATCH_REGEXP r, a, f, ...
ACCEPT_N: 21, // ACCEPT_N n
ACCEPT_STRING: 22, // ACCEPT_STRING s
FAIL: 23, // FAIL e
// Calls
// Calls
LOAD_SAVED_POS: 24, // LOAD_SAVED_POS p
UPDATE_SAVED_POS: 25, // UPDATE_SAVED_POS
CALL: 26, // CALL f, n, pc, p1, p2, ..., pN
LOAD_SAVED_POS: 24, // LOAD_SAVED_POS p
UPDATE_SAVED_POS: 25, // UPDATE_SAVED_POS
CALL: 26, // CALL f, n, pc, p1, p2, ..., pN
// Rules
// Rules
RULE: 27, // RULE r
RULE: 27, // RULE r
// Failure Reporting
// Failure Reporting
SILENT_FAILS_ON: 28, // SILENT_FAILS_ON
SILENT_FAILS_OFF: 29 // SILENT_FAILS_OFF
SILENT_FAILS_ON: 28, // SILENT_FAILS_ON
SILENT_FAILS_OFF: 29 // SILENT_FAILS_OFF
};
module.exports = opcodes;

816
lib/compiler/passes/generate-bytecode.js

@ -188,431 +188,431 @@ let visitor = require("../visitor");
//
// silentFails--;
function generateBytecode(ast) {
let consts = [];
function addConst(value) {
let index = consts.indexOf(value);
return index === -1 ? consts.push(value) - 1 : index;
}
function addFunctionConst(params, code) {
return addConst(
"function(" + params.join(", ") + ") {" + code + "}"
);
}
function cloneEnv(env) {
let clone = {};
Object.keys(env).forEach(name => {
clone[name] = env[name];
});
return clone;
}
function buildSequence() {
return Array.prototype.concat.apply([], arguments);
}
function buildCondition(condCode, thenCode, elseCode) {
return condCode.concat(
[thenCode.length, elseCode.length],
thenCode,
elseCode
);
}
function buildLoop(condCode, bodyCode) {
return condCode.concat([bodyCode.length], bodyCode);
}
function buildCall(functionIndex, delta, env, sp) {
let params = Object.keys(env).map(name => sp - env[name]);
return [op.CALL, functionIndex, delta, params.length].concat(params);
}
function buildSimplePredicate(expression, negative, context) {
return buildSequence(
[op.PUSH_CURR_POS],
[op.SILENT_FAILS_ON],
generate(expression, {
sp: context.sp + 1,
env: cloneEnv(context.env),
action: null
}),
[op.SILENT_FAILS_OFF],
buildCondition(
[negative ? op.IF_ERROR : op.IF_NOT_ERROR],
buildSequence(
[op.POP],
[negative ? op.POP : op.POP_CURR_POS],
[op.PUSH_UNDEFINED]
),
buildSequence(
[op.POP],
[negative ? op.POP_CURR_POS : op.POP],
[op.PUSH_FAILED]
)
)
);
}
function buildSemanticPredicate(code, negative, context) {
let functionIndex = addFunctionConst(Object.keys(context.env), code);
return buildSequence(
[op.UPDATE_SAVED_POS],
buildCall(functionIndex, 0, context.env, context.sp),
buildCondition(
[op.IF],
buildSequence(
[op.POP],
negative ? [op.PUSH_FAILED] : [op.PUSH_UNDEFINED]
),
buildSequence(
[op.POP],
negative ? [op.PUSH_UNDEFINED] : [op.PUSH_FAILED]
)
)
);
}
function buildAppendLoop(expressionCode) {
return buildLoop(
[op.WHILE_NOT_ERROR],
buildSequence([op.APPEND], expressionCode)
);
}
let generate = visitor.build({
grammar(node) {
node.rules.forEach(generate);
node.consts = consts;
},
rule(node) {
node.bytecode = generate(node.expression, {
sp: -1, // stack pointer
env: { }, // mapping of label names to stack positions
action: null // action nodes pass themselves to children here
});
},
named(node, context) {
let nameIndex = addConst(
"peg$otherExpectation(\"" + js.stringEscape(node.name) + "\")"
);
// The code generated below is slightly suboptimal because |FAIL| pushes
// to the stack, so we need to stick a |POP| in front of it. We lack a
// dedicated instruction that would just report the failure and not touch
// the stack.
return buildSequence(
[op.SILENT_FAILS_ON],
generate(node.expression, context),
[op.SILENT_FAILS_OFF],
buildCondition([op.IF_ERROR], [op.FAIL, nameIndex], [])
);
},
choice(node, context) {
function buildAlternativesCode(alternatives, context) {
return buildSequence(
generate(alternatives[0], {
sp: context.sp,
env: cloneEnv(context.env),
action: null
}),
alternatives.length > 1
? buildCondition(
[op.IF_ERROR],
buildSequence(
[op.POP],
buildAlternativesCode(alternatives.slice(1), context)
),
[]
)
: []
);
}
return buildAlternativesCode(node.alternatives, context);
},
action(node, context) {
let env = cloneEnv(context.env);
let emitCall = node.expression.type !== "sequence"
let consts = [];
function