You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

270 lines
7.1 KiB
Plaintext

#!/usr/bin/env node
14 years ago
var util = require("util");
var fs = require("fs");
var path = require("path");
var PEG = require("../lib/peg");
13 years ago
/* Helpers */
function printVersion() {
util.puts("PEG.js " + PEG.VERSION);
}
function printHelp() {
util.puts("Usage: pegjs [options] [--] [<input_file>] [<output_file>]");
util.puts("");
util.puts("Generates a parser from the PEG grammar specified in the <input_file> and writes");
util.puts("it to the <output_file>.");
util.puts("");
util.puts("If the <output_file> is omitted, its name is generated by changing the");
util.puts("<input_file> extension to \".js\". If both <input_file> and <output_file> are");
util.puts("omitted, standard input and output are used.");
util.puts("");
util.puts("Options:");
util.puts(" -e, --export-var <variable> name of the variable where the parser");
util.puts(" object will be stored (default:");
util.puts(" \"module.exports\")");
util.puts(" --cache make generated parser cache results");
util.puts(" --allowed-start-rules <rules> comma-separated list of rules the generated");
util.puts(" parser will be allowed to start parsing");
util.puts(" from (default: the first rule in the");
util.puts(" grammar)");
util.puts(" -o, --optimize <goal> select optimization for speed or size");
util.puts(" (default: speed)");
Implement basic support for tracing Parsers can now be generated with support for tracing using the --trace CLI option or a boolean |trace| option to |PEG.buildParser|. This makes them trace their progress, which can be useful for debugging. Parsers generated with tracing support are called "tracing parsers". When a tracing parser executes, by default it traces the rules it enters and exits by writing messages to the console. For example, a parser built from this grammar: start = a / b a = "a" b = "b" will write this to the console when parsing input "b": 1:1 rule.enter start 1:1 rule.enter a 1:1 rule.fail a 1:1 rule.enter b 1:2 rule.match b 1:2 rule.match start You can customize tracing by passing a custom *tracer* to parser's |parse| method using the |tracer| option: parser.parse(input, { trace: tracer }); This will replace the built-in default tracer (which writes to the console) by the tracer you supplied. The tracer must be an object with a |trace| method. This method is called each time a tracing event happens. It takes one argument which is an object describing the tracing event. Currently, three events are supported: * rule.enter -- triggered when a rule is entered * rule.match -- triggered when a rule matches successfully * rule.fail -- triggered when a rule fails to match These events are triggered in nested pairs -- for each rule.enter event there is a matching rule.match or rule.fail event. The event object passed as an argument to |trace| contains these properties: * type -- event type * rule -- name of the rule the event is related to * offset -- parse position at the time of the event * line -- line at the time of the event * column -- column at the time of the event * result -- rule's match result (only for rule.match event) The whole tracing API is somewhat experimental (which is why it isn't documented properly yet) and I expect it will evolve over time as experience is gained. The default tracer is also somewhat bare-bones. I hope that PEG.js user community will develop more sophisticated tracers over time and I'll be able to integrate their best ideas into the default tracer.
9 years ago
util.puts(" --trace enable tracing in generated parser");
util.puts(" --plugin <plugin> use a specified plugin (can be specified");
util.puts(" multiple times)");
util.puts(" --extra-options <options> additional options (in JSON format) to pass");
util.puts(" to PEG.buildParser");
util.puts(" --extra-options-file <file> file with additional options (in JSON");
util.puts(" format) to pass to PEG.buildParser");
util.puts(" -v, --version print version information and exit");
util.puts(" -h, --help print help and exit");
}
function exitSuccess() {
process.exit(0);
}
function exitFailure() {
process.exit(1);
}
function abort(message) {
util.error(message);
exitFailure();
}
function addExtraOptions(options, json) {
var extraOptions;
try {
extraOptions = JSON.parse(json);
} catch (e) {
if (!(e instanceof SyntaxError)) { throw e; }
abort("Error parsing JSON: " + e.message);
}
if (typeof extraOptions !== "object") {
abort("The JSON with extra options has to represent an object.");
}
for (var key in extraOptions) {
if (extraOptions.hasOwnProperty(key)) {
options[key] = extraOptions[key];
}
}
}
/*
* Extracted into a function just to silence JSHint complaining about creating
* functions in a loop.
*/
function trim(s) {
return s.trim();
}
13 years ago
/* Arguments */
var args = process.argv.slice(2); // Trim "node" and the script path.
function isOption(arg) {
return (/^-/).test(arg);
}
function nextArg() {
args.shift();
}
13 years ago
/* Files */
function readStream(inputStream, callback) {
var input = "";
inputStream.on("data", function(data) { input += data; });
inputStream.on("end", function() { callback(input); });
}
13 years ago
/* Main */
/* This makes the generated parser a CommonJS module by default. */
var exportVar = "module.exports";
var options = {
Code generator rewrite This is a complete rewrite of the PEG.js code generator. Its goals are: 1. Allow optimizing the generated parser code for code size as well as for parsing speed. 2. Prepare ground for future optimizations and big features (like incremental parsing). 2. Replace the old template-based code-generation system with something more lightweight and flexible. 4. General code cleanup (structure, style, variable names, ...). New Architecture ---------------- The new code generator consists of two steps: * Bytecode generator -- produces bytecode for an abstract virtual machine * JavaScript generator -- produces JavaScript code based on the bytecode The abstract virtual machine is stack-based. Originally I wanted to make it register-based, but it turned out that all the code related to it would be more complex and the bytecode itself would be longer (because of explicit register specifications in instructions). The only downsides of the stack-based approach seem to be few small inefficiencies (see e.g. the |NIP| instruction), which seem to be insignificant. The new generator allows optimizing for parsing speed or code size (you can choose using the |optimize| option of the |PEG.buildParser| method or the --optimize/-o option on the command-line). When optimizing for size, the JavaScript generator emits the bytecode together with its constant table and a generic bytecode interpreter. Because the interpreter is small and the bytecode and constant table grow only slowly with size of the grammar, the resulting parser is also small. When optimizing for speed, the JavaScript generator just compiles the bytecode into JavaScript. The generated code is relatively efficient, so the resulting parser is fast. Internal Identifiers -------------------- As a small bonus, all internal identifiers visible to user code in the initializer, actions and predicates are prefixed by |peg$|. This lowers the chance that identifiers in user code will conflict with the ones from PEG.js. It also makes using any internals in user code ugly, which is a good thing. This solves GH-92. Performance ----------- The new code generator improved parsing speed and parser code size significantly. The generated parsers are now: * 39% faster when optimizing for speed * 69% smaller when optimizing for size (without minification) * 31% smaller when optimizing for size (with minification) (Parsing speed was measured using the |benchmark/run| script. Code size was measured by generating parsers for examples in the |examples| directory and adding up the file sizes. Minification was done by |uglify --ascii| in version 1.3.4.) Final Note ---------- This is just a beginning! The new code generator lays a foundation upon which many optimizations and improvements can (and will) be made. Stay tuned :-)
11 years ago
cache: false,
output: "source",
optimize: "speed",
Implement basic support for tracing Parsers can now be generated with support for tracing using the --trace CLI option or a boolean |trace| option to |PEG.buildParser|. This makes them trace their progress, which can be useful for debugging. Parsers generated with tracing support are called "tracing parsers". When a tracing parser executes, by default it traces the rules it enters and exits by writing messages to the console. For example, a parser built from this grammar: start = a / b a = "a" b = "b" will write this to the console when parsing input "b": 1:1 rule.enter start 1:1 rule.enter a 1:1 rule.fail a 1:1 rule.enter b 1:2 rule.match b 1:2 rule.match start You can customize tracing by passing a custom *tracer* to parser's |parse| method using the |tracer| option: parser.parse(input, { trace: tracer }); This will replace the built-in default tracer (which writes to the console) by the tracer you supplied. The tracer must be an object with a |trace| method. This method is called each time a tracing event happens. It takes one argument which is an object describing the tracing event. Currently, three events are supported: * rule.enter -- triggered when a rule is entered * rule.match -- triggered when a rule matches successfully * rule.fail -- triggered when a rule fails to match These events are triggered in nested pairs -- for each rule.enter event there is a matching rule.match or rule.fail event. The event object passed as an argument to |trace| contains these properties: * type -- event type * rule -- name of the rule the event is related to * offset -- parse position at the time of the event * line -- line at the time of the event * column -- column at the time of the event * result -- rule's match result (only for rule.match event) The whole tracing API is somewhat experimental (which is why it isn't documented properly yet) and I expect it will evolve over time as experience is gained. The default tracer is also somewhat bare-bones. I hope that PEG.js user community will develop more sophisticated tracers over time and I'll be able to integrate their best ideas into the default tracer.
9 years ago
trace: false,
plugins: []
};
while (args.length > 0 && isOption(args[0])) {
switch (args[0]) {
case "-e":
case "--export-var":
nextArg();
if (args.length === 0) {
abort("Missing parameter of the -e/--export-var option.");
}
exportVar = args[0];
break;
case "--cache":
options.cache = true;
break;
case "--allowed-start-rules":
nextArg();
if (args.length === 0) {
abort("Missing parameter of the -e/--allowed-start-rules option.");
}
options.allowedStartRules = args[0]
.split(",")
.map(trim);
break;
Implement basic support for tracing Parsers can now be generated with support for tracing using the --trace CLI option or a boolean |trace| option to |PEG.buildParser|. This makes them trace their progress, which can be useful for debugging. Parsers generated with tracing support are called "tracing parsers". When a tracing parser executes, by default it traces the rules it enters and exits by writing messages to the console. For example, a parser built from this grammar: start = a / b a = "a" b = "b" will write this to the console when parsing input "b": 1:1 rule.enter start 1:1 rule.enter a 1:1 rule.fail a 1:1 rule.enter b 1:2 rule.match b 1:2 rule.match start You can customize tracing by passing a custom *tracer* to parser's |parse| method using the |tracer| option: parser.parse(input, { trace: tracer }); This will replace the built-in default tracer (which writes to the console) by the tracer you supplied. The tracer must be an object with a |trace| method. This method is called each time a tracing event happens. It takes one argument which is an object describing the tracing event. Currently, three events are supported: * rule.enter -- triggered when a rule is entered * rule.match -- triggered when a rule matches successfully * rule.fail -- triggered when a rule fails to match These events are triggered in nested pairs -- for each rule.enter event there is a matching rule.match or rule.fail event. The event object passed as an argument to |trace| contains these properties: * type -- event type * rule -- name of the rule the event is related to * offset -- parse position at the time of the event * line -- line at the time of the event * column -- column at the time of the event * result -- rule's match result (only for rule.match event) The whole tracing API is somewhat experimental (which is why it isn't documented properly yet) and I expect it will evolve over time as experience is gained. The default tracer is also somewhat bare-bones. I hope that PEG.js user community will develop more sophisticated tracers over time and I'll be able to integrate their best ideas into the default tracer.
9 years ago
case "--trace":
options.trace = true;
break;
Code generator rewrite This is a complete rewrite of the PEG.js code generator. Its goals are: 1. Allow optimizing the generated parser code for code size as well as for parsing speed. 2. Prepare ground for future optimizations and big features (like incremental parsing). 2. Replace the old template-based code-generation system with something more lightweight and flexible. 4. General code cleanup (structure, style, variable names, ...). New Architecture ---------------- The new code generator consists of two steps: * Bytecode generator -- produces bytecode for an abstract virtual machine * JavaScript generator -- produces JavaScript code based on the bytecode The abstract virtual machine is stack-based. Originally I wanted to make it register-based, but it turned out that all the code related to it would be more complex and the bytecode itself would be longer (because of explicit register specifications in instructions). The only downsides of the stack-based approach seem to be few small inefficiencies (see e.g. the |NIP| instruction), which seem to be insignificant. The new generator allows optimizing for parsing speed or code size (you can choose using the |optimize| option of the |PEG.buildParser| method or the --optimize/-o option on the command-line). When optimizing for size, the JavaScript generator emits the bytecode together with its constant table and a generic bytecode interpreter. Because the interpreter is small and the bytecode and constant table grow only slowly with size of the grammar, the resulting parser is also small. When optimizing for speed, the JavaScript generator just compiles the bytecode into JavaScript. The generated code is relatively efficient, so the resulting parser is fast. Internal Identifiers -------------------- As a small bonus, all internal identifiers visible to user code in the initializer, actions and predicates are prefixed by |peg$|. This lowers the chance that identifiers in user code will conflict with the ones from PEG.js. It also makes using any internals in user code ugly, which is a good thing. This solves GH-92. Performance ----------- The new code generator improved parsing speed and parser code size significantly. The generated parsers are now: * 39% faster when optimizing for speed * 69% smaller when optimizing for size (without minification) * 31% smaller when optimizing for size (with minification) (Parsing speed was measured using the |benchmark/run| script. Code size was measured by generating parsers for examples in the |examples| directory and adding up the file sizes. Minification was done by |uglify --ascii| in version 1.3.4.) Final Note ---------- This is just a beginning! The new code generator lays a foundation upon which many optimizations and improvements can (and will) be made. Stay tuned :-)
11 years ago
case "-o":
case "--optimize":
nextArg();
if (args.length === 0) {
abort("Missing parameter of the -o/--optimize option.");
}
if (args[0] !== "speed" && args[0] !== "size") {
abort("Optimization goal must be either \"speed\" or \"size\".");
}
options.optimize = args[0];
break;
case "--plugin":
nextArg();
if (args.length === 0) {
abort("Missing parameter of the --plugin option.");
}
var id = /^(\.\/|\.\.\/)/.test(args[0]) ? path.resolve(args[0]) : args[0];
var mod;
try {
mod = require(id);
} catch (e) {
if (e.code !== "MODULE_NOT_FOUND") { throw e; }
abort("Can't load module \"" + id + "\".");
}
options.plugins.push(mod);
break;
case "--extra-options":
nextArg();
if (args.length === 0) {
abort("Missing parameter of the --extra-options option.");
}
addExtraOptions(options, args[0]);
break;
case "--extra-options-file":
nextArg();
if (args.length === 0) {
abort("Missing parameter of the --extra-options-file option.");
}
try {
var json = fs.readFileSync(args[0]);
} catch(e) {
abort("Can't read from file \"" + args[0] + "\".");
}
addExtraOptions(options, json);
break;
case "-v":
case "--version":
printVersion();
exitSuccess();
break;
case "-h":
case "--help":
printHelp();
exitSuccess();
break;
case "--":
nextArg();
break;
default:
abort("Unknown option: " + args[0] + ".");
}
nextArg();
}
switch (args.length) {
case 0:
process.stdin.resume();
var inputStream = process.stdin;
var outputStream = process.stdout;
break;
case 1:
case 2:
var inputFile = args[0];
var inputStream = fs.createReadStream(inputFile);
inputStream.on("error", function() {
abort("Can't read from file \"" + inputFile + "\".");
});
var outputFile = args.length === 1
? args[0].replace(/\.[^.]*$/, ".js")
: args[1];
var outputStream = fs.createWriteStream(outputFile);
outputStream.on("error", function() {
abort("Can't write to file \"" + outputFile + "\".");
});
break;
default:
abort("Too many arguments.");
}
readStream(inputStream, function(input) {
var source;
try {
source = PEG.buildParser(input, options);
} catch (e) {
if (e.line !== undefined && e.column !== undefined) {
abort(e.line + ":" + e.column + ": " + e.message);
} else {
abort(e.message);
}
}
outputStream.write(
exportVar !== "" ? exportVar + " = " + source + ";\n" : source + "\n"
);
if (outputStream !== process.stdout) {
outputStream.end();
}
});