pegjs/bin/pegjs

#!/usr/bin/env node

var util = require("util");
var fs   = require("fs");
var PEG  = require("../lib/peg");

/* Helpers */

function printVersion() {
  util.puts("PEG.js " + PEG.VERSION);
}

function printHelp() {
  util.puts("Usage: pegjs [options] [--] [<input_file>] [<output_file>]");
  util.puts("");
  util.puts("Generates a parser from the PEG grammar specified in the <input_file> and");
  util.puts("writes it to the <output_file>.");
  util.puts("");
  util.puts("If the <output_file> is omitted, its name is generated by changing the");
  util.puts("<input_file> extension to \".js\". If both <input_file> and <output_file> are");
  util.puts("omitted, standard input and output are used.");
  util.puts("");
  util.puts("Options:");
  util.puts("  -e, --export-var <variable>        name of the variable where the parser");
  util.puts("                                     object will be stored (default:");
  util.puts("                                     \"module.exports\")");
  util.puts("      --cache                        make generated parser cache results");
  util.puts("      --allowed-start-rules <rules>  comma-separated list of rules the generated");
  util.puts("                                     parser will be allowed to start parsing");
  util.puts("                                     from (default: the first rule in the");
  util.puts("                                     grammar)");
  util.puts("  -o, --optimize <goal>              select optimization for speed or size (default:");
  util.puts("                                     speed)");
  util.puts("  -v, --version                      print version information and exit");
  util.puts("  -h, --help                         print help and exit");
}

function exitSuccess() {
  process.exit(0);
}

function exitFailure() {
  process.exit(1);
}

function abort(message) {
  util.error(message);
  exitFailure();
}

/* Arguments */

var args = process.argv.slice(2); // Trim "node" and the script path.

function isOption(arg) {
  return /^-/.test(arg);
}

function nextArg() {
  args.shift();
}

/* Files */

function readStream(inputStream, callback) {
  var input = "";
  inputStream.on("data", function(data) { input += data; });
  inputStream.on("end", function() { callback(input); });
}

/* Main */

/* This makes the generated parser a CommonJS module by default. */
var exportVar = "module.exports";
var options = {
  cache:    false,
  output:   "source",
  optimize: "speed"
};

while (args.length > 0 && isOption(args[0])) {
  switch (args[0]) {
    case "-e":
    case "--export-var":
      nextArg();
      if (args.length === 0) {
        abort("Missing parameter of the -e/--export-var option.");
      }
      exportVar = args[0];
      break;

    case "--cache":
      options.cache = true;
      break;

    case "--allowed-start-rules":
      nextArg();
      if (args.length === 0) {
        abort("Missing parameter of the -e/--allowed-start-rules option.");
      }
      options.allowedStartRules = args[0]
        .split(",")
        .map(function(s) { return s.trim() });
      break;

    case "-o":
    case "--optimize":
      nextArg();
      if (args.length === 0) {
        abort("Missing parameter of the -o/--optimize option.");
      }
      if (args[0] !== "speed" && args[0] !== "size") {
        abort("Optimization goal must be either \"speed\" or \"size\".");
      }
      options.optimize = args[0];
      break;

    case "-v":
    case "--version":
      printVersion();
      exitSuccess();
      break;

    case "-h":
    case "--help":
      printHelp();
      exitSuccess();
      break;

    case "--":
      nextArg();
      break;

    default:
      abort("Unknown option: " + args[0] + ".");
  }
  nextArg();
}

switch (args.length) {
  case 0:
    process.stdin.resume();
    var inputStream = process.stdin;
    var outputStream = process.stdout;
    break;

  case 1:
  case 2:
    var inputFile = args[0];
    var inputStream = fs.createReadStream(inputFile);
    inputStream.on("error", function() {
      abort("Can't read from file \"" + inputFile + "\".");
    });

    var outputFile = args.length == 1
      ? args[0].replace(/\.[^.]*$/, ".js")
      : args[1];
    var outputStream = fs.createWriteStream(outputFile);
    outputStream.on("error", function() {
      abort("Can't write to file \"" + outputFile + "\".");
    });

    break;

  default:
    abort("Too many arguments.");
}

readStream(inputStream, function(input) {
  try {
    var source = PEG.buildParser(input, options);
  } catch (e) {
    if (e.line !== undefined && e.column !== undefined) {
      abort(e.line + ":" + e.column + ": " + e.message);
    } else {
      abort(e.message);
    }
  }

  outputStream.write(exportVar + " = " + source + ";\n");
  if (outputStream !== process.stdout) {
    outputStream.end();
  }
});
Switch command-line mode backend from Rhino to Node 13 years ago			`#!/usr/bin/env node`
Initial commit. 14 years ago
Use \|util\| module instead of \|sys\| \|sys\| emits a warning in Node.js 0.6.x. 13 years ago			`var util = require("util");`
			`var fs = require("fs");`
			`var PEG = require("../lib/peg");`
Make bin/pegjs work when called via a symlink Similar issue exists on Windows too (they have symlinks since Vista), but I could not find how to dereference symlinks from batch files, so I did not fix it. I guess this does not matter much given how little the symlinks are used in the Windows world. Closes #1. 14 years ago
Small style fixes 13 years ago			`/* Helpers */`
Switch command-line mode backend from Rhino to Node 13 years ago
			`function printVersion() {`
Use \|util\| module instead of \|sys\| \|sys\| emits a warning in Node.js 0.6.x. 13 years ago			`util.puts("PEG.js " + PEG.VERSION);`
Switch command-line mode backend from Rhino to Node 13 years ago			`}`

			`function printHelp() {`
Use \|util\| module instead of \|sys\| \|sys\| emits a warning in Node.js 0.6.x. 13 years ago			`util.puts("Usage: pegjs [options] [--] [<input_file>] [<output_file>]");`
			`util.puts("");`
			`util.puts("Generates a parser from the PEG grammar specified in the <input_file> and");`
			`util.puts("writes it to the <output_file>.");`
			`util.puts("");`
			`util.puts("If the <output_file> is omitted, its name is generated by changing the");`
			`util.puts("<input_file> extension to \".js\". If both <input_file> and <output_file> are");`
			`util.puts("omitted, standard input and output are used.");`
			`util.puts("");`
			`util.puts("Options:");`
Allowed start rules must be specified explicitly Before this commit, generated parser were able to start parsing from any rule. This was nice, but it made rule code inlining impossible. Since this commit, the list of allowed start rules has to be specified explicitly using the \|allowedStartRules\| option of the \|PEG.buildParser\| method (or the --allowed-start-rule option on the command-line). These rules will be excluded from inlining when it's implemented. 12 years ago			`util.puts(" -e, --export-var <variable> name of the variable where the parser");`
			`util.puts(" object will be stored (default:");`
			`util.puts(" \"module.exports\")");`
			`util.puts(" --cache make generated parser cache results");`
			`util.puts(" --allowed-start-rules <rules> comma-separated list of rules the generated");`
			`util.puts(" parser will be allowed to start parsing");`
			`util.puts(" from (default: the first rule in the");`
			`util.puts(" grammar)");`
Code generator rewrite This is a complete rewrite of the PEG.js code generator. Its goals are: 1. Allow optimizing the generated parser code for code size as well as for parsing speed. 2. Prepare ground for future optimizations and big features (like incremental parsing). 2. Replace the old template-based code-generation system with something more lightweight and flexible. 4. General code cleanup (structure, style, variable names, ...). New Architecture ---------------- The new code generator consists of two steps: * Bytecode generator -- produces bytecode for an abstract virtual machine * JavaScript generator -- produces JavaScript code based on the bytecode The abstract virtual machine is stack-based. Originally I wanted to make it register-based, but it turned out that all the code related to it would be more complex and the bytecode itself would be longer (because of explicit register specifications in instructions). The only downsides of the stack-based approach seem to be few small inefficiencies (see e.g. the \|NIP\| instruction), which seem to be insignificant. The new generator allows optimizing for parsing speed or code size (you can choose using the \|optimize\| option of the \|PEG.buildParser\| method or the --optimize/-o option on the command-line). When optimizing for size, the JavaScript generator emits the bytecode together with its constant table and a generic bytecode interpreter. Because the interpreter is small and the bytecode and constant table grow only slowly with size of the grammar, the resulting parser is also small. When optimizing for speed, the JavaScript generator just compiles the bytecode into JavaScript. The generated code is relatively efficient, so the resulting parser is fast. Internal Identifiers -------------------- As a small bonus, all internal identifiers visible to user code in the initializer, actions and predicates are prefixed by \|peg$\|. This lowers the chance that identifiers in user code will conflict with the ones from PEG.js. It also makes using any internals in user code ugly, which is a good thing. This solves GH-92. Performance ----------- The new code generator improved parsing speed and parser code size significantly. The generated parsers are now: * 39% faster when optimizing for speed * 69% smaller when optimizing for size (without minification) * 31% smaller when optimizing for size (with minification) (Parsing speed was measured using the \|benchmark/run\| script. Code size was measured by generating parsers for examples in the \|examples\| directory and adding up the file sizes. Minification was done by \|uglify --ascii\| in version 1.3.4.) Final Note ---------- This is just a beginning! The new code generator lays a foundation upon which many optimizations and improvements can (and will) be made. Stay tuned :-) 12 years ago			`util.puts(" -o, --optimize <goal> select optimization for speed or size (default:");`
			`util.puts(" speed)");`
Allowed start rules must be specified explicitly Before this commit, generated parser were able to start parsing from any rule. This was nice, but it made rule code inlining impossible. Since this commit, the list of allowed start rules has to be specified explicitly using the \|allowedStartRules\| option of the \|PEG.buildParser\| method (or the --allowed-start-rule option on the command-line). These rules will be excluded from inlining when it's implemented. 12 years ago			`util.puts(" -v, --version print version information and exit");`
			`util.puts(" -h, --help print help and exit");`
Switch command-line mode backend from Rhino to Node 13 years ago			`}`

			`function exitSuccess() {`
			`process.exit(0);`
			`}`

			`function exitFailure() {`
			`process.exit(1);`
			`}`

			`function abort(message) {`
Use \|util\| module instead of \|sys\| \|sys\| emits a warning in Node.js 0.6.x. 13 years ago			`util.error(message);`
Switch command-line mode backend from Rhino to Node 13 years ago			`exitFailure();`
			`}`

Small style fixes 13 years ago			`/* Arguments */`
Switch command-line mode backend from Rhino to Node 13 years ago
			`var args = process.argv.slice(2); // Trim "node" and the script path.`

			`function isOption(arg) {`
Fix regexp for detecting command-line options in /bin/pegjs Closes GH-51. 13 years ago			`return /^-/.test(arg);`
Switch command-line mode backend from Rhino to Node 13 years ago			`}`

			`function nextArg() {`
			`args.shift();`
			`}`

Small style fixes 13 years ago			`/* Files */`
Switch command-line mode backend from Rhino to Node 13 years ago
			`function readStream(inputStream, callback) {`
			`var input = "";`
			`inputStream.on("data", function(data) { input += data; });`
			`inputStream.on("end", function() { callback(input); });`
			`}`

Small style fixes 13 years ago			`/* Main */`
Switch command-line mode backend from Rhino to Node 13 years ago
			`/* This makes the generated parser a CommonJS module by default. */`
bin/pegjs: Default parser variable name is "module.exports" The previous default name was "exports.parser". This meant that to use the generated parser in Node.js, you had to use code like this: var parser = require("./my-cool-parser").parser; parser.parse(...); Now you can shorten it a bit: var parser = require("./my-cool-parser"); parser.parse(...); The shorter version makes sense since no other objects except the parser are exported from the module. 13 years ago			`var exportVar = "module.exports";`
Implement the "--cache" command-line option 12 years ago			`var options = {`
Code generator rewrite This is a complete rewrite of the PEG.js code generator. Its goals are: 1. Allow optimizing the generated parser code for code size as well as for parsing speed. 2. Prepare ground for future optimizations and big features (like incremental parsing). 2. Replace the old template-based code-generation system with something more lightweight and flexible. 4. General code cleanup (structure, style, variable names, ...). New Architecture ---------------- The new code generator consists of two steps: * Bytecode generator -- produces bytecode for an abstract virtual machine * JavaScript generator -- produces JavaScript code based on the bytecode The abstract virtual machine is stack-based. Originally I wanted to make it register-based, but it turned out that all the code related to it would be more complex and the bytecode itself would be longer (because of explicit register specifications in instructions). The only downsides of the stack-based approach seem to be few small inefficiencies (see e.g. the \|NIP\| instruction), which seem to be insignificant. The new generator allows optimizing for parsing speed or code size (you can choose using the \|optimize\| option of the \|PEG.buildParser\| method or the --optimize/-o option on the command-line). When optimizing for size, the JavaScript generator emits the bytecode together with its constant table and a generic bytecode interpreter. Because the interpreter is small and the bytecode and constant table grow only slowly with size of the grammar, the resulting parser is also small. When optimizing for speed, the JavaScript generator just compiles the bytecode into JavaScript. The generated code is relatively efficient, so the resulting parser is fast. Internal Identifiers -------------------- As a small bonus, all internal identifiers visible to user code in the initializer, actions and predicates are prefixed by \|peg$\|. This lowers the chance that identifiers in user code will conflict with the ones from PEG.js. It also makes using any internals in user code ugly, which is a good thing. This solves GH-92. Performance ----------- The new code generator improved parsing speed and parser code size significantly. The generated parsers are now: * 39% faster when optimizing for speed * 69% smaller when optimizing for size (without minification) * 31% smaller when optimizing for size (with minification) (Parsing speed was measured using the \|benchmark/run\| script. Code size was measured by generating parsers for examples in the \|examples\| directory and adding up the file sizes. Minification was done by \|uglify --ascii\| in version 1.3.4.) Final Note ---------- This is just a beginning! The new code generator lays a foundation upon which many optimizations and improvements can (and will) be made. Stay tuned :-) 12 years ago			`cache: false,`
			`output: "source",`
			`optimize: "speed"`
Implement the "--cache" command-line option 12 years ago			`};`
Switch command-line mode backend from Rhino to Node 13 years ago
			`while (args.length > 0 && isOption(args[0])) {`
			`switch (args[0]) {`
			`case "-e":`
			`case "--export-var":`
			`nextArg();`
Add check for missing parameter of the -e/--export-var option. 13 years ago			`if (args.length === 0) {`
			`abort("Missing parameter of the -e/--export-var option.");`
			`}`
Switch command-line mode backend from Rhino to Node 13 years ago			`exportVar = args[0];`
			`break;`

Implement the "--cache" command-line option 12 years ago			`case "--cache":`
			`options.cache = true;`
			`break;`

Allowed start rules must be specified explicitly Before this commit, generated parser were able to start parsing from any rule. This was nice, but it made rule code inlining impossible. Since this commit, the list of allowed start rules has to be specified explicitly using the \|allowedStartRules\| option of the \|PEG.buildParser\| method (or the --allowed-start-rule option on the command-line). These rules will be excluded from inlining when it's implemented. 12 years ago			`case "--allowed-start-rules":`
			`nextArg();`
			`if (args.length === 0) {`
			`abort("Missing parameter of the -e/--allowed-start-rules option.");`
			`}`
			`options.allowedStartRules = args[0]`
			`.split(",")`
			`.map(function(s) { return s.trim() });`
			`break;`

Code generator rewrite This is a complete rewrite of the PEG.js code generator. Its goals are: 1. Allow optimizing the generated parser code for code size as well as for parsing speed. 2. Prepare ground for future optimizations and big features (like incremental parsing). 2. Replace the old template-based code-generation system with something more lightweight and flexible. 4. General code cleanup (structure, style, variable names, ...). New Architecture ---------------- The new code generator consists of two steps: * Bytecode generator -- produces bytecode for an abstract virtual machine * JavaScript generator -- produces JavaScript code based on the bytecode The abstract virtual machine is stack-based. Originally I wanted to make it register-based, but it turned out that all the code related to it would be more complex and the bytecode itself would be longer (because of explicit register specifications in instructions). The only downsides of the stack-based approach seem to be few small inefficiencies (see e.g. the \|NIP\| instruction), which seem to be insignificant. The new generator allows optimizing for parsing speed or code size (you can choose using the \|optimize\| option of the \|PEG.buildParser\| method or the --optimize/-o option on the command-line). When optimizing for size, the JavaScript generator emits the bytecode together with its constant table and a generic bytecode interpreter. Because the interpreter is small and the bytecode and constant table grow only slowly with size of the grammar, the resulting parser is also small. When optimizing for speed, the JavaScript generator just compiles the bytecode into JavaScript. The generated code is relatively efficient, so the resulting parser is fast. Internal Identifiers -------------------- As a small bonus, all internal identifiers visible to user code in the initializer, actions and predicates are prefixed by \|peg$\|. This lowers the chance that identifiers in user code will conflict with the ones from PEG.js. It also makes using any internals in user code ugly, which is a good thing. This solves GH-92. Performance ----------- The new code generator improved parsing speed and parser code size significantly. The generated parsers are now: * 39% faster when optimizing for speed * 69% smaller when optimizing for size (without minification) * 31% smaller when optimizing for size (with minification) (Parsing speed was measured using the \|benchmark/run\| script. Code size was measured by generating parsers for examples in the \|examples\| directory and adding up the file sizes. Minification was done by \|uglify --ascii\| in version 1.3.4.) Final Note ---------- This is just a beginning! The new code generator lays a foundation upon which many optimizations and improvements can (and will) be made. Stay tuned :-) 12 years ago			`case "-o":`
			`case "--optimize":`
			`nextArg();`
			`if (args.length === 0) {`
			`abort("Missing parameter of the -o/--optimize option.");`
			`}`
			`if (args[0] !== "speed" && args[0] !== "size") {`
			`abort("Optimization goal must be either \"speed\" or \"size\".");`
			`}`
			`options.optimize = args[0];`
			`break;`

Switch command-line mode backend from Rhino to Node 13 years ago			`case "-v":`
			`case "--version":`
			`printVersion();`
			`exitSuccess();`
			`break;`

			`case "-h":`
			`case "--help":`
			`printHelp();`
			`exitSuccess();`
			`break;`

			`case "--":`
			`nextArg();`
			`break;`

			`default:`
			`abort("Unknown option: " + args[0] + ".");`
			`}`
			`nextArg();`
			`}`

			`switch (args.length) {`
			`case 0:`
/bin/pegjs: Avoid calling \|process.openStdin\| While \|process.openStdin\| is not officially deprecated, it's no longer documented and just using \|process.stdin\| and resuming it seems to be the official way. 12 years ago			`process.stdin.resume();`
			`var inputStream = process.stdin;`
Switch command-line mode backend from Rhino to Node 13 years ago			`var outputStream = process.stdout;`
			`break;`
Nicer messages in command-line mode on read/write errors 13 years ago
Switch command-line mode backend from Rhino to Node 13 years ago			`case 1:`
			`case 2:`
Nicer messages in command-line mode on read/write errors 13 years ago			`var inputFile = args[0];`
			`var inputStream = fs.createReadStream(inputFile);`
			`inputStream.on("error", function() {`
			`abort("Can't read from file \"" + inputFile + "\".");`
			`});`

			`var outputFile = args.length == 1`
			`? args[0].replace(/\.[^.]*$/, ".js")`
			`: args[1];`
			`var outputStream = fs.createWriteStream(outputFile);`
			`outputStream.on("error", function() {`
			`abort("Can't write to file \"" + outputFile + "\".");`
			`});`

Switch command-line mode backend from Rhino to Node 13 years ago			`break;`
Nicer messages in command-line mode on read/write errors 13 years ago
Switch command-line mode backend from Rhino to Node 13 years ago			`default:`
			`abort("Too many arguments.");`
			`}`

			`readStream(inputStream, function(input) {`
			`try {`
Kill the \|toSource\| method, introduce the \|output\| option Before this commit, \|PEG.buildParser\| always returned a parser object. The only way to get its source code was to call the \|toSource\| method on it. While this method worked for parsers produced by \|PEG.buildParser\| directly, it didn't work for parsers instantiated by executing their source code. In other words, it was unreliable. This commit remvoes the \|toSource\| method on generated parsers and introduces a new \|output\| option to \|PEG.buildParser\|. It allows callers to specify whether they want to get back the parser object (\|options.output === "parser"\|) or its source code (\|options.output === "source"\|). This is much better and more reliable API. 12 years ago			`var source = PEG.buildParser(input, options);`
Switch command-line mode backend from Rhino to Node 13 years ago			`} catch (e) {`
			`if (e.line !== undefined && e.column !== undefined) {`
			`abort(e.line + ":" + e.column + ": " + e.message);`
			`} else {`
			`abort(e.message);`
			`}`
			`}`

Kill the \|toSource\| method, introduce the \|output\| option Before this commit, \|PEG.buildParser\| always returned a parser object. The only way to get its source code was to call the \|toSource\| method on it. While this method worked for parsers produced by \|PEG.buildParser\| directly, it didn't work for parsers instantiated by executing their source code. In other words, it was unreliable. This commit remvoes the \|toSource\| method on generated parsers and introduces a new \|output\| option to \|PEG.buildParser\|. It allows callers to specify whether they want to get back the parser object (\|options.output === "parser"\|) or its source code (\|options.output === "source"\|). This is much better and more reliable API. 12 years ago			`outputStream.write(exportVar + " = " + source + ";\n");`
/bin/pegjs: Don't close standard output Avoids "Error: process.stdout cannot be closed" error when invoked without file arguments. 12 years ago			`if (outputStream !== process.stdout) {`
			`outputStream.end();`
			`}`
Switch command-line mode backend from Rhino to Node 13 years ago			`});`