Git repo npmization: Compose PEG.js from Node.js modules

PEG.js source code becomes a set of Node.js modules that include each
other as needed. The distribution version is built by bundling these
modules together, wrapping them inside a bit of boilerplate code that
makes |module.exports| and |require| work.

Part of a fix for GH-32.
redux
David Majda 12 years ago
parent c6cf129635
commit 4cda79951a

@ -2,6 +2,21 @@
PEGJS_VERSION = `cat $(VERSION_FILE)`
# ===== Modules =====
# Order matters -- dependencies must be listed before modules dependent on them.
MODULES = utils \
grammar-error \
parser \
compiler/passes/allocate-registers \
compiler/passes/generate-code \
compiler/passes/remove-proxy-rules \
compiler/passes/report-left-recursion \
compiler/passes/report-missing-rules \
compiler/passes \
compiler \
peg
# ===== Directories =====
SRC_DIR = src
@ -39,52 +54,64 @@ JASMINE_NODE = jasmine-node
PEGJS = $(BIN_DIR)/pegjs
BENCHMARK_RUN = $(BENCHMARK_DIR)/run
# ===== Preprocessor =====
# A simple preprocessor that recognizes two directives:
#
# @VERSION -- insert PEG.js version
# @include "<file>" -- include <file> here
#
# This could have been implemented many ways. I chose Perl because everyone will
# have it.
PREPROCESS=perl -e ' \
use strict; \
use warnings; \
\
use File::Basename; \
\
sub preprocess { \
my $$file = shift; \
my $$output = ""; \
\
open(my $$f, $$file) or die "Can\x27t open $$file: $$!"; \
while(<$$f>) { \
if (/^\s*\/\/\s*\@include\s*"([^"]*)"\s*$$/) { \
$$output .= preprocess(dirname($$file) . "/" . $$1); \
next; \
} \
\
$$output .= $$_; \
} \
close($$f); \
\
return $$output; \
} \
\
print preprocess($$ARGV[0]); \
'
# ===== Targets =====
# Generate the grammar parser
parser:
$(PEGJS) --export-var PEG.parser $(PARSER_SRC_FILE) $(PARSER_OUT_FILE)
$(PEGJS) $(PARSER_SRC_FILE) $(PARSER_OUT_FILE)
# Build the PEG.js library
build:
mkdir -p $(LIB_DIR)
$(PREPROCESS) $(PEGJS_SRC_FILE) > $(PEGJS_LIB_FILE)
rm -f $(PEGJS_LIB_FILE)
# The following code is inspired by CoffeeScript's Cakefile.
echo '/*' >> $(PEGJS_LIB_FILE)
echo " * PEG.js $(PEGJS_VERSION)" >> $(PEGJS_LIB_FILE)
echo ' *' >> $(PEGJS_LIB_FILE)
echo ' * http://pegjs.majda.cz/' >> $(PEGJS_LIB_FILE)
echo ' *' >> $(PEGJS_LIB_FILE)
echo ' * Copyright (c) 2010-2012 David Majda' >> $(PEGJS_LIB_FILE)
echo ' * Licensed under the MIT license' >> $(PEGJS_LIB_FILE)
echo ' */' >> $(PEGJS_LIB_FILE)
echo 'var PEG = (function(undefined) {' >> $(PEGJS_LIB_FILE)
echo ' var modules = {' >> $(PEGJS_LIB_FILE)
echo ' define: function(name, factory) {' >> $(PEGJS_LIB_FILE)
echo ' var dir = name.replace(/(^|\/)[^/]+$$/, "$$1"),' >> $(PEGJS_LIB_FILE)
echo ' module = { exports: {} };' >> $(PEGJS_LIB_FILE)
echo '' >> $(PEGJS_LIB_FILE)
echo ' function require(path) {' >> $(PEGJS_LIB_FILE)
echo ' var name = dir + path,' >> $(PEGJS_LIB_FILE)
echo ' regexp = /[^\/]+\/\.\.\/|\.\//;' >> $(PEGJS_LIB_FILE)
echo '' >> $(PEGJS_LIB_FILE)
echo " /* Can't use /.../g because we can move backwards in the string. */" >> $(PEGJS_LIB_FILE)
echo ' while (regexp.test(name)) {' >> $(PEGJS_LIB_FILE)
echo ' name = name.replace(regexp, "");' >> $(PEGJS_LIB_FILE)
echo ' }' >> $(PEGJS_LIB_FILE)
echo '' >> $(PEGJS_LIB_FILE)
echo ' return modules[name];' >> $(PEGJS_LIB_FILE)
echo ' }' >> $(PEGJS_LIB_FILE)
echo '' >> $(PEGJS_LIB_FILE)
echo ' factory(module, require);' >> $(PEGJS_LIB_FILE)
echo ' this[name] = module.exports;' >> $(PEGJS_LIB_FILE)
echo ' }' >> $(PEGJS_LIB_FILE)
echo ' };' >> $(PEGJS_LIB_FILE)
echo '' >> $(PEGJS_LIB_FILE)
for module in $(MODULES); do \
echo " modules.define(\"$$module\", function(module, require) {" >> $(PEGJS_LIB_FILE); \
sed -e 's/^/ /' src/$$module.js >> $(PEGJS_LIB_FILE); \
echo ' });' >> $(PEGJS_LIB_FILE); \
echo '' >> $(PEGJS_LIB_FILE); \
done
echo ' return modules["peg"]' >> $(PEGJS_LIB_FILE)
echo '})();' >> $(PEGJS_LIB_FILE)
echo '' >> $(PEGJS_LIB_FILE)
echo 'if (typeof module !== "undefined") {' >> $(PEGJS_LIB_FILE)
echo ' module.exports = PEG;' >> $(PEGJS_LIB_FILE)
echo '}' >> $(PEGJS_LIB_FILE)
# Remove built PEG.js library (created by "build")
clean:

@ -1,4 +1,8 @@
PEG.compiler = {
var utils = require("./utils");
module.exports = {
passes: require("./compiler/passes"),
/*
* Names of passes that will get run during the compilation (in the specified
* order).
@ -20,7 +24,7 @@ PEG.compiler = {
compile: function(ast, options) {
var that = this;
each(this.appliedPassNames, function(passName) {
utils.each(this.appliedPassNames, function(passName) {
that.passes[passName](ast, options);
});
@ -31,5 +35,3 @@ PEG.compiler = {
return result;
}
};
// @include "compiler/passes.js"

@ -5,10 +5,10 @@
* or modify it as needed. If the pass encounters a semantic error, it throws
* |PEG.GrammarError|.
*/
PEG.compiler.passes = {};
// @include "passes/report-missing-rules.js"
// @include "passes/report-left-recursion.js"
// @include "passes/remove-proxy-rules.js"
// @include "passes/allocate-registers.js"
// @include "passes/generate-code.js"
module.exports = {
reportMissingRules: require("./passes/report-missing-rules"),
reportLeftRecursion: require("./passes/report-left-recursion"),
removeProxyRules: require("./passes/remove-proxy-rules"),
allocateRegisters: require("./passes/allocate-registers"),
generateCode: require("./passes/generate-code")
};

@ -1,3 +1,5 @@
var utils = require("../../utils");
/*
* Allocates registers that the generated code for each node will use to store
* match results and parse positions. For "action", "semantic_and" and
@ -23,7 +25,7 @@
* point of action/predicate code execution to registers that will contain
* the labeled values.
*/
PEG.compiler.passes.allocateRegisters = function(ast) {
module.exports = function(ast) {
/*
* Register allocator that allocates registers from an unlimited
* integer-indexed pool. It allows allocating and releaseing registers in any
@ -151,10 +153,10 @@ PEG.compiler.passes.allocateRegisters = function(ast) {
node.params = vars.buildParams();
}
var compute = buildNodeVisitor({
var compute = utils.buildNodeVisitor({
grammar:
function(node) {
each(node.rules, compute);
utils.each(node.rules, compute);
},
rule:
@ -172,7 +174,7 @@ PEG.compiler.passes.allocateRegisters = function(ast) {
choice:
function(node) {
each(node.alternatives, function(alternative) {
utils.each(node.alternatives, function(alternative) {
reuseResult(node, alternative);
scoped(function() {
compute(alternative);
@ -194,11 +196,11 @@ PEG.compiler.passes.allocateRegisters = function(ast) {
sequence:
function(node) {
savePos(node, function() {
each(node.elements, function(element) {
utils.each(node.elements, function(element) {
element.resultIndex = registers.alloc();
compute(element);
});
each(node.elements, function(element) {
utils.each(node.elements, function(element) {
registers.release(element.resultIndex);
});
});

@ -1,7 +1,9 @@
var utils = require("../../utils");
/* Generates the parser code. */
PEG.compiler.passes.generateCode = function(ast, options) {
options = clone(options) || {};
defaults(options, {
module.exports = function(ast, options) {
options = utils.clone(options) || {};
utils.defaults(options, {
cache: false,
trackLineAndColumn: false,
allowedStartRules: [ast.startRule]
@ -751,12 +753,12 @@ PEG.compiler.passes.generateCode = function(ast, options) {
})();
function fill(name, vars) {
vars.string = quote;
vars.range = range;
vars.map = map;
vars.pluck = pluck;
vars.keys = keys;
vars.values = values;
vars.string = utils.quote;
vars.range = utils.range;
vars.map = utils.map;
vars.pluck = utils.pluck;
vars.keys = utils.keys;
vars.values = utils.values;
vars.emit = emit;
vars.options = options;
@ -797,7 +799,7 @@ PEG.compiler.passes.generateCode = function(ast, options) {
return function(node) { return fill(name, { node: node }); };
}
var emit = buildNodeVisitor({
var emit = utils.buildNodeVisitor({
grammar: emitSimple("grammar"),
initializer: function(node) { return node.code; },
@ -878,12 +880,12 @@ PEG.compiler.passes.generateCode = function(ast, options) {
if (node.parts.length > 0) {
regexp = '/^['
+ (node.inverted ? '^' : '')
+ map(node.parts, function(part) {
+ utils.map(node.parts, function(part) {
return part instanceof Array
? quoteForRegexpClass(part[0])
? utils.quoteForRegexpClass(part[0])
+ '-'
+ quoteForRegexpClass(part[1])
: quoteForRegexpClass(part);
+ utils.quoteForRegexpClass(part[1])
: utils.quoteForRegexpClass(part);
}).join('')
+ ']/' + (node.ignoreCase ? 'i' : '');
} else {

@ -1,7 +1,9 @@
var utils = require("../../utils");
/*
* Removes proxy rules -- that is, rules that only delegate to other rule.
*/
PEG.compiler.passes.removeProxyRules = function(ast) {
module.exports = function(ast) {
function isProxyRule(node) {
return node.type === "rule" && node.expression.type === "rule_ref";
}
@ -15,13 +17,13 @@ PEG.compiler.passes.removeProxyRules = function(ast) {
function replaceInSubnodes(propertyName) {
return function(node, from, to) {
each(node[propertyName], function(subnode) {
utils.each(node[propertyName], function(subnode) {
replace(subnode, from, to);
});
};
}
var replace = buildNodeVisitor({
var replace = utils.buildNodeVisitor({
grammar: replaceInSubnodes("rules"),
rule: replaceInExpression,
named: replaceInExpression,
@ -54,7 +56,7 @@ PEG.compiler.passes.removeProxyRules = function(ast) {
var indices = [];
each(ast.rules, function(rule, i) {
utils.each(ast.rules, function(rule, i) {
if (isProxyRule(rule)) {
replaceRuleRefs(ast, rule.name, rule.expression.name);
if (rule.name === ast.startRule) {
@ -66,7 +68,7 @@ PEG.compiler.passes.removeProxyRules = function(ast) {
indices.reverse();
each(indices, function(index) {
utils.each(indices, function(index) {
ast.rules.splice(index, 1);
});
};

@ -1,5 +1,7 @@
var utils = require("../../utils");
/* Checks that no left recursion is present. */
PEG.compiler.passes.reportLeftRecursion = function(ast) {
module.exports = function(ast) {
function nop() {}
function checkExpression(node, appliedRules) {
@ -8,13 +10,13 @@ PEG.compiler.passes.reportLeftRecursion = function(ast) {
function checkSubnodes(propertyName) {
return function(node, appliedRules) {
each(node[propertyName], function(subnode) {
utils.each(node[propertyName], function(subnode) {
check(subnode, appliedRules);
});
};
}
var check = buildNodeVisitor({
var check = utils.buildNodeVisitor({
grammar: checkSubnodes("rules"),
rule:
@ -44,12 +46,12 @@ PEG.compiler.passes.reportLeftRecursion = function(ast) {
rule_ref:
function(node, appliedRules) {
if (contains(appliedRules, node.name)) {
if (utils.contains(appliedRules, node.name)) {
throw new PEG.GrammarError(
"Left recursion detected for rule \"" + node.name + "\"."
);
}
check(findRuleByName(ast, node.name), appliedRules);
check(utils.findRuleByName(ast, node.name), appliedRules);
},
literal: nop,

@ -1,14 +1,16 @@
var utils = require("../../utils");
/* Checks that all referenced rules exist. */
PEG.compiler.passes.reportMissingRules = function(ast) {
module.exports = function(ast) {
function nop() {}
function checkExpression(node) { check(node.expression); }
function checkSubnodes(propertyName) {
return function(node) { each(node[propertyName], check); };
return function(node) { utils.each(node[propertyName], check); };
}
var check = buildNodeVisitor({
var check = utils.buildNodeVisitor({
grammar: checkSubnodes("rules"),
rule: checkExpression,
named: checkExpression,
@ -26,7 +28,7 @@ PEG.compiler.passes.reportMissingRules = function(ast) {
rule_ref:
function(node) {
if (!findRuleByName(ast, node.name)) {
if (!utils.findRuleByName(ast, node.name)) {
throw new PEG.GrammarError(
"Referenced rule \"" + node.name + "\" does not exist."
);

@ -0,0 +1,9 @@
var utils = require("./utils");
/* Thrown when the grammar contains an error. */
module.exports = function(message) {
this.name = "GrammarError";
this.message = message;
};
utils.subclass(module.exports, Error);

@ -1,4 +1,4 @@
PEG.parser = (function(){
module.exports = (function(){
/*
* Generated by PEG.js 0.7.0.
*
@ -298,7 +298,7 @@ PEG.parser = (function(){
if (r0 !== null) {
r0 = (function(offset, head, tail) {
if (tail.length > 0) {
var alternatives = [head].concat(map(
var alternatives = [head].concat(utils.map(
tail,
function(element) { return element[1]; }
));
@ -1869,10 +1869,10 @@ PEG.parser = (function(){
}
if (r0 !== null) {
r0 = (function(offset, inverted, parts, flags) {
var partsConverted = map(parts, function(part) { return part.data; });
var partsConverted = utils.map(parts, function(part) { return part.data; });
var rawText = "["
+ inverted
+ map(parts, function(part) { return part.rawText; }).join("")
+ utils.map(parts, function(part) { return part.rawText; }).join("")
+ "]"
+ flags;
@ -1959,7 +1959,7 @@ PEG.parser = (function(){
return {
data: char_,
// FIXME: Get the raw text from the input directly.
rawText: quoteForRegexpClass(char_)
rawText: utils.quoteForRegexpClass(char_)
};
})(r1, r0);
}
@ -2796,6 +2796,9 @@ PEG.parser = (function(){
}
var utils = require("./utils");
var result = parseFunctions[startRule]();
/*

@ -1,3 +1,7 @@
{
var utils = require("./utils");
}
grammar
= __ initializer:initializer? rules:rule+ {
return {
@ -37,7 +41,7 @@ expression
choice
= head:sequence tail:(slash sequence)* {
if (tail.length > 0) {
var alternatives = [head].concat(map(
var alternatives = [head].concat(utils.map(
tail,
function(element) { return element[1]; }
));
@ -238,10 +242,10 @@ simpleSingleQuotedCharacter
class "character class"
= "[" inverted:"^"? parts:(classCharacterRange / classCharacter)* "]" flags:"i"? __ {
var partsConverted = map(parts, function(part) { return part.data; });
var partsConverted = utils.map(parts, function(part) { return part.data; });
var rawText = "["
+ inverted
+ map(parts, function(part) { return part.rawText; }).join("")
+ utils.map(parts, function(part) { return part.rawText; }).join("")
+ "]"
+ flags;
@ -275,7 +279,7 @@ classCharacter
return {
data: char_,
// FIXME: Get the raw text from the input directly.
rawText: quoteForRegexpClass(char_)
rawText: utils.quoteForRegexpClass(char_)
};
}

@ -1,19 +1,11 @@
/*
* PEG.js 0.7.0
*
* http://pegjs.majda.cz/
*
* Copyright (c) 2010-2012 David Majda
* Licensed under the MIT license.
*/
var PEG = (function(undefined) {
// @include "utils.js"
var PEG = {
module.exports = {
/* PEG.js version (uses semantic versioning). */
VERSION: "0.7.0",
GrammarError: require("./grammar-error"),
parser: require("./parser"),
compiler: require("./compiler"),
/*
* Generates a parser from a specified grammar and returns it.
*
@ -26,26 +18,6 @@ var PEG = {
* generated parser and cause its malfunction.
*/
buildParser: function(grammar, options) {
return PEG.compiler.compile(PEG.parser.parse(grammar), options);
return this.compiler.compile(this.parser.parse(grammar), options);
}
};
/* Thrown when the grammar contains an error. */
PEG.GrammarError = function(message) {
this.name = "GrammarError";
this.message = message;
};
subclass(PEG.GrammarError, Error);
// @include "parser.js"
// @include "compiler.js"
return PEG;
})();
if (typeof module !== "undefined") {
module.exports = PEG;
}

@ -1,5 +1,6 @@
/* Like Python's |range|, but without |step|. */
function range(start, stop) {
var utils = {
/* Like Python's |range|, but without |step|. */
range: function(start, stop) {
if (stop === undefined) {
stop = start;
start = 0;
@ -10,18 +11,18 @@ function range(start, stop) {
result[i] = j;
}
return result;
}
},
function find(array, callback) {
find: function(array, callback) {
var length = array.length;
for (var i = 0; i < length; i++) {
if (callback(array[i])) {
return array[i];
}
}
}
},
function contains(array, value) {
contains: function(array, value) {
/*
* Stupid IE does not have Array.prototype.indexOf, otherwise this function
* would be a one-liner.
@ -33,77 +34,77 @@ function contains(array, value) {
}
}
return false;
}
},
function each(array, callback) {
each: function(array, callback) {
var length = array.length;
for (var i = 0; i < length; i++) {
callback(array[i], i);
}
}
},
function map(array, callback) {
map: function(array, callback) {
var result = [];
var length = array.length;
for (var i = 0; i < length; i++) {
result[i] = callback(array[i], i);
}
return result;
}
},
function pluck(array, key) {
return map(array, function (e) { return e[key]; });
}
pluck: function(array, key) {
return utils.map(array, function (e) { return e[key]; });
},
function keys(object) {
keys: function(object) {
var result = [];
for (var key in object) {
result.push(key);
}
return result;
}
},
function values(object) {
values: function(object) {
var result = [];
for (var key in object) {
result.push(object[key]);
}
return result;
}
},
function clone(object) {
clone: function(object) {
var result = {};
for (var key in object) {
result[key] = object[key];
}
return result;
}
},
function defaults(object, defaults) {
defaults: function(object, defaults) {
for (var key in defaults) {
if (object[key] === undefined) {
object[key] = defaults[key];
}
}
}
},
/*
/*
* The code needs to be in sync with the code template in the compilation
* function for "action" nodes.
*/
function subclass(child, parent) {
subclass: function(child, parent) {
function ctor() { this.constructor = child; }
ctor.prototype = parent.prototype;
child.prototype = new ctor();
}
},
/*
/*
* Returns a string padded on the left to a desired length with a character.
*
* The code needs to be in sync with the code template in the compilation
* function for "action" nodes.
*/
function padLeft(input, padding, length) {
padLeft: function(input, padding, length) {
var result = input;
var padLength = length - input.length;
@ -112,16 +113,16 @@ function padLeft(input, padding, length) {
}
return result;
}
},
/*
/*
* Returns an escape sequence for given character. Uses \x for characters <=
* 0xFF to save space, \u for the rest.
*
* The code needs to be in sync with the code template in the compilation
* function for "action" nodes.
*/
function escape(ch) {
escape: function(ch) {
var charCode = ch.charCodeAt(0);
var escapeChar;
var length;
@ -134,22 +135,22 @@ function escape(ch) {
length = 4;
}
return '\\' + escapeChar + padLeft(charCode.toString(16).toUpperCase(), '0', length);
}
return '\\' + escapeChar + utils.padLeft(charCode.toString(16).toUpperCase(), '0', length);
},
/*
/*
* Surrounds the string with quotes and escapes characters inside so that the
* result is a valid JavaScript string.
*
* The code needs to be in sync with the code template in the compilation
* function for "action" nodes.
*/
function quote(s) {
quote: function(s) {
/*
* ECMA-262, 5th ed., 7.8.4: All characters may appear literally in a string
* literal except for the closing quote character, backslash, carriage return,
* line separator, paragraph separator, and line feed. Any character may
* appear in the form of an escape sequence.
* literal except for the closing quote character, backslash, carriage
* return, line separator, paragraph separator, and line feed. Any character
* may appear in the form of an escape sequence.
*
* For portability, we also escape escape all control and non-ASCII
* characters. Note that "\0" and "\v" escape sequences are not used because
@ -163,15 +164,15 @@ function quote(s) {
.replace(/\n/g, '\\n') // line feed
.replace(/\f/g, '\\f') // form feed
.replace(/\r/g, '\\r') // carriage return
.replace(/[\x00-\x07\x0B\x0E-\x1F\x80-\uFFFF]/g, escape)
.replace(/[\x00-\x07\x0B\x0E-\x1F\x80-\uFFFF]/g, utils.escape)
+ '"';
}
},
/*
/*
* Escapes characters inside the string so that it can be used as a list of
* characters in a character class of a regular expression.
*/
function quoteForRegexpClass(s) {
quoteForRegexpClass: function(s) {
/*
* Based on ECMA-262, 5th ed., 7.8.5 & 15.10.1.
*
@ -189,21 +190,25 @@ function quoteForRegexpClass(s) {
.replace(/\v/g, '\\x0B') // vertical tab
.replace(/\f/g, '\\f') // form feed
.replace(/\r/g, '\\r') // carriage return
.replace(/[\x01-\x08\x0E-\x1F\x80-\uFFFF]/g, escape);
}
.replace(/[\x01-\x08\x0E-\x1F\x80-\uFFFF]/g, utils.escape);
},
/*
/*
* Builds a node visitor -- a function which takes a node and any number of
* other parameters, calls an appropriate function according to the node type,
* passes it all its parameters and returns its value. The functions for various
* node types are passed in a parameter to |buildNodeVisitor| as a hash.
* passes it all its parameters and returns its value. The functions for
* various node types are passed in a parameter to |buildNodeVisitor| as a
* hash.
*/
function buildNodeVisitor(functions) {
buildNodeVisitor: function(functions) {
return function(node) {
return functions[node.type].apply(null, arguments);
};
}
},
findRuleByName: function(ast, name) {
return utils.find(ast.rules, function(r) { return r.name === name; });
}
};
function findRuleByName(ast, name) {
return find(ast.rules, function(r) { return r.name === name; });
}
module.exports = utils;

Loading…
Cancel
Save