Browse Source

Split the source code into several files, introduce build system

The source code is now in the src directory. The library needs to be
built using "rake", which creates the lib/peg.js file by combining the
source files.
redux
David Majda 11 years ago
parent
commit
e59f3ba338
  1. 2
      .gitignore
  2. 9
      README.md
  3. 29
      Rakefile
  4. 3
      benchmark/index.html
  5. 3
      bin/pegjs-main.js
  6. 127
      src/checks.js
  7. 31
      src/compiler.js
  8. 369
      src/emitter.js
  9. 0
      src/parser.js
  10. 0
      src/parser.pegjs
  11. 83
      src/passes.js
  12. 43
      src/peg.js
  13. 94
      src/utils.js
  14. 5
      test/index.html
  15. 0
      test/parser-test.js

2
.gitignore

@ -1 +1 @@
lib/*.min.js
lib/*

9
README.md

@ -14,6 +14,15 @@ Features
* Handles wide class of grammars (superset of LL(*k*) and LR(*k*))
* Precise and human-friendly error reporting
Building
--------
To build PEG.js, simply run the `rake` command:
$ rake
Of course, you need to have [Rake](http://rake.rubyforge.org/) installed. The command creates PEG.js library in `lib/peg.js` by processing files in the `src` directory.
Usage
-----

29
Rakefile

@ -1,4 +1,31 @@
SRC_DIR = "src"
LIB_DIR = "lib"
BIN_DIR = "bin"
def preprocess(input, base_dir)
input.split("\n").map do |line|
if line =~ /^\s*\/\/\s*@include\s*"([^"]*)"\s*$/
included_file = "#{base_dir}/#$1"
if !File.exist?(included_file)
abort "Included file \"#{included_file}\" does not exist."
end
preprocess(File.read(included_file), base_dir)
else
line
end
end.join("\n")
end
desc "Generate the grammar parser"
task :metaparser do
system "bin/pegjs PEG.parser lib/metagrammar.pegjs"
system "#{BIN_DIR}/pegjs PEG.parser #{SRC_DIR}/parser.pegjs"
end
desc "Build the peg.js file"
task :build do
File.open("#{LIB_DIR}/peg.js", "w") do |f|
f.write(preprocess(File.read("#{SRC_DIR}/peg.js"), SRC_DIR))
end
end
task :default => :build

3
benchmark/index.html

@ -60,8 +60,7 @@
</tr>
</table>
<script src="../lib/compiler.js"></script>
<script src="../lib/metagrammar.js"></script>
<script src="../lib/peg.js"></script>
<script src="../vendor/jquery/jquery.js"></script>
<script src="../vendor/jquery.scrollto/jquery.scrollTo.js"></script>
<script>

3
bin/pegjs-main.js

@ -5,8 +5,7 @@ importPackage(java.lang);
* Rhino does not have __FILE__ or anything similar so we have to pass the
* script path from the outside.
*/
load(arguments[0] + "/../lib/compiler.js");
load(arguments[0] + "/../lib/metagrammar.js");
load(arguments[0] + "/../lib/peg.js");
var FILE_STDIN = "-";
var FILE_STDOUT = "-";

127
src/checks.js

@ -0,0 +1,127 @@
/*
* Checks made on the grammar AST before compilation. Each check is a function
* that is passed the AST and does not return anything. If the check passes, the
* function does not do anything special, otherwise it throws
* |PEG.GrammarError|. The checks are run in sequence in order of their
* definition.
*/
PEG.compiler.checks = [
/* Checks that all referenced rules exist. */
function(ast) {
function nop() {}
function checkExpression(node) { check(node.expression); }
function checkSubnodes(propertyName) {
return function(node) {
PEG.ArrayUtils.each(node[propertyName], check);
};
}
var checkFunctions = {
grammar:
function(node) {
for (var name in node.rules) {
check(node.rules[name]);
}
},
rule: checkExpression,
choice: checkSubnodes("alternatives"),
sequence: checkSubnodes("elements"),
labeled: checkExpression,
simple_and: checkExpression,
simple_not: checkExpression,
semantic_and: nop,
semantic_not: nop,
optional: checkExpression,
zero_or_more: checkExpression,
one_or_more: checkExpression,
action: checkExpression,
rule_ref:
function(node) {
if (ast.rules[node.name] === undefined) {
throw new PEG.GrammarError(
"Referenced rule \"" + node.name + "\" does not exist."
);
}
},
literal: nop,
any: nop,
"class": nop
};
function check(node) { checkFunctions[node.type](node); }
check(ast);
},
/* Checks that no left recursion is present. */
function(ast) {
function nop() {}
function checkExpression(node, appliedRules) {
check(node.expression, appliedRules);
}
var checkFunctions = {
grammar:
function(node, appliedRules) {
for (var name in node.rules) {
check(ast.rules[name], appliedRules);
}
},
rule:
function(node, appliedRules) {
check(node.expression, appliedRules.concat(node.name));
},
choice:
function(node, appliedRules) {
PEG.ArrayUtils.each(node.alternatives, function(alternative) {
check(alternative, appliedRules);
});
},
sequence:
function(node, appliedRules) {
if (node.elements.length > 0) {
check(node.elements[0], appliedRules);
}
},
labeled: checkExpression,
simple_and: checkExpression,
simple_not: checkExpression,
semantic_and: nop,
semantic_not: nop,
optional: checkExpression,
zero_or_more: checkExpression,
one_or_more: checkExpression,
action: checkExpression,
rule_ref:
function(node, appliedRules) {
if (PEG.ArrayUtils.contains(appliedRules, node.name)) {
throw new PEG.GrammarError(
"Left recursion detected for rule \"" + node.name + "\"."
);
}
check(ast.rules[node.name], appliedRules);
},
literal: nop,
any: nop,
"class": nop
};
function check(node, appliedRules) {
checkFunctions[node.type](node, appliedRules);
}
check(ast, []);
}
];

31
src/compiler.js

@ -0,0 +1,31 @@
/* ===== PEG.compiler ===== */
PEG.compiler = {
/*
* Generates a parser from a specified grammar AST. Throws |PEG.GrammarError|
* if the AST contains a semantic error. Note that not all errors are detected
* during the generation and some may protrude to the generated parser and
* cause its malfunction.
*/
compile: function(ast) {
for (var i = 0; i < this.checks.length; i++) {
this.checks[i](ast);
}
for (var i = 0; i < this.passes.length; i++) {
ast = this.passes[i](ast);
}
var source = this.emitter(ast);
var result = eval(source);
result._source = source;
return result;
}
};
/* ===== Includes ===== */
// @include "checks.js"
// @include "passes.js"
// @include "emitter.js"

369
lib/compiler.js → src/emitter.js

@ -1,370 +1,3 @@
/* PEG.js compiler. */
(function(undefined) {
/* ===== PEG ===== */
/* no var */ PEG = {};
/*
* Generates a parser from a specified grammar and returns it.
*
* The grammar must be a string in the format described by the metagramar in the
* metagrammar.pegjs file.
*
* Throws |PEG.parser.SyntaxError| if the grammar contains a syntax error or
* |PEG.GrammarError| if it contains a semantic error. Note that not all errors
* are detected during the generation and some may protrude to the generated
* parser and cause its malfunction.
*/
PEG.buildParser = function(grammar) {
return PEG.compiler.compile(PEG.parser.parse(grammar));
};
/* ===== PEG.GrammarError ===== */
/* Thrown when the grammar contains an error. */
PEG.GrammarError = function(message) {
this.name = "PEG.GrammarError";
this.message = message;
};
PEG.GrammarError.prototype = Error.prototype;
/* ===== PEG.ArrayUtils ===== */
/* Array manipulation utility functions. */
PEG.ArrayUtils = {
/*
* The code needs to be in sync with the code template in the compilation
* function for "action" nodes.
*/
contains: function(array, value) {
/*
* Stupid IE does not have Array.prototype.indexOf, otherwise this function
* would be a one-liner.
*/
var length = array.length;
for (var i = 0; i < length; i++) {
if (array[i] === value) {
return true;
}
}
return false;
},
each: function(array, callback) {
var length = array.length;
for (var i = 0; i < length; i++) {
callback(array[i]);
}
},
map: function(array, callback) {
var result = [];
var length = array.length;
for (var i = 0; i < length; i++) {
result[i] = callback(array[i]);
}
return result;
}
};
/* ===== PEG.StringUtils ===== */
/* String manipulation utility functions. */
PEG.StringUtils = {
/*
* Surrounds the string with quotes and escapes characters inside so that the
* result is a valid JavaScript string.
*
* The code needs to be in sync with th code template in the compilation
* function for "action" nodes.
*/
quote: function(s) {
/*
* ECMA-262, 5th ed., 7.8.4: All characters may appear literally in a string
* literal except for the closing quote character, backslash, carriage
* return, line separator, paragraph separator, and line feed. Any character
* may appear in the form of an escape sequence.
*/
return '"' + s
.replace(/\\/g, '\\\\') // backslash
.replace(/"/g, '\\"') // closing quote character
.replace(/\r/g, '\\r') // carriage return
.replace(/\u2028/g, '\\u2028') // line separator
.replace(/\u2029/g, '\\u2029') // paragraph separator
.replace(/\n/g, '\\n') // line feed
+ '"';
}
};
/* ===== PEG.RegExpUtils ===== */
/* RegExp manipulation utility functions. */
PEG.RegExpUtils = {
/*
* Escapes characters inside the string so that it can be used as a list of
* characters in a character class of a regular expression.
*/
quoteForClass: function(s) {
/* Based on ECMA-262, 5th ed., 7.8.5 & 15.10.1. */
return s
.replace(/\\/g, '\\\\') // backslash
.replace(/\0/g, '\\0') // null, IE needs this
.replace(/\//g, '\\/') // closing slash
.replace(/]/g, '\\]') // closing bracket
.replace(/-/g, '\\-') // dash
.replace(/\r/g, '\\r') // carriage return
.replace(/\u2028/g, '\\u2028') // line separator
.replace(/\u2029/g, '\\u2029') // paragraph separator
.replace(/\n/g, '\\n') // line feed
}
};
/* ===== PEG.compiler ===== */
PEG.compiler = {
/*
* Generates a parser from a specified grammar AST. Throws |PEG.GrammarError|
* if the AST contains a semantic error. Note that not all errors are detected
* during the generation and some may protrude to the generated parser and
* cause its malfunction.
*/
compile: function(ast) {
for (var i = 0; i < this.checks.length; i++) {
this.checks[i](ast);
}
for (var i = 0; i < this.passes.length; i++) {
ast = this.passes[i](ast);
}
var source = this.emitter(ast);
var result = eval(source);
result._source = source;
return result;
}
};
/*
* Checks made on the grammar AST before compilation. Each check is a function
* that is passed the AST and does not return anything. If the check passes, the
* function does not do anything special, otherwise it throws
* |PEG.GrammarError|. The checks are run in sequence in order of their
* definition.
*/
PEG.compiler.checks = [
/* Checks that all referenced rules exist. */
function(ast) {
function nop() {}
function checkExpression(node) { check(node.expression); }
function checkSubnodes(propertyName) {
return function(node) {
PEG.ArrayUtils.each(node[propertyName], check);
};
}
var checkFunctions = {
grammar:
function(node) {
for (var name in node.rules) {
check(node.rules[name]);
}
},
rule: checkExpression,
choice: checkSubnodes("alternatives"),
sequence: checkSubnodes("elements"),
labeled: checkExpression,
simple_and: checkExpression,
simple_not: checkExpression,
semantic_and: nop,
semantic_not: nop,
optional: checkExpression,
zero_or_more: checkExpression,
one_or_more: checkExpression,
action: checkExpression,
rule_ref:
function(node) {
if (ast.rules[node.name] === undefined) {
throw new PEG.GrammarError(
"Referenced rule \"" + node.name + "\" does not exist."
);
}
},
literal: nop,
any: nop,
"class": nop
};
function check(node) { checkFunctions[node.type](node); }
check(ast);
},
/* Checks that no left recursion is present. */
function(ast) {
function nop() {}
function checkExpression(node, appliedRules) {
check(node.expression, appliedRules);
}
var checkFunctions = {
grammar:
function(node, appliedRules) {
for (var name in node.rules) {
check(ast.rules[name], appliedRules);
}
},
rule:
function(node, appliedRules) {
check(node.expression, appliedRules.concat(node.name));
},
choice:
function(node, appliedRules) {
PEG.ArrayUtils.each(node.alternatives, function(alternative) {
check(alternative, appliedRules);
});
},
sequence:
function(node, appliedRules) {
if (node.elements.length > 0) {
check(node.elements[0], appliedRules);
}
},
labeled: checkExpression,
simple_and: checkExpression,
simple_not: checkExpression,
semantic_and: nop,
semantic_not: nop,
optional: checkExpression,
zero_or_more: checkExpression,
one_or_more: checkExpression,
action: checkExpression,
rule_ref:
function(node, appliedRules) {
if (PEG.ArrayUtils.contains(appliedRules, node.name)) {
throw new PEG.GrammarError(
"Left recursion detected for rule \"" + node.name + "\"."
);
}
check(ast.rules[node.name], appliedRules);
},
literal: nop,
any: nop,
"class": nop
};
function check(node, appliedRules) {
checkFunctions[node.type](node, appliedRules);
}
check(ast, []);
}
];
/*
* Optimalization passes made on the grammar AST before compilation. Each pass
* is a function that is passed the AST and returns a new AST. The AST can be
* modified in-place by the pass. The passes are run in sequence in order of
* their definition.
*/
PEG.compiler.passes = [
/*
* Removes proxy rules -- that is, rules that only delegate to other rule.
*/
function(ast) {
function isProxyRule(node) {
return node.type === "rule" && node.expression.type === "rule_ref";
}
function replaceRuleRefs(ast, from, to) {
function nop() {}
function replaceInExpression(node, from, to) {
replace(node.expression, from, to);
}
function replaceInSubnodes(propertyName) {
return function(node, from, to) {
PEG.ArrayUtils.each(node[propertyName], function(node) {
replace(node, from, to);
});
};
}
var replaceFunctions = {
grammar:
function(node, from, to) {
for (var name in node.rules) {
replace(ast.rules[name], from, to);
}
},
rule: replaceInExpression,
choice: replaceInSubnodes("alternatives"),
sequence: replaceInSubnodes("elements"),
labeled: replaceInExpression,
simple_and: replaceInExpression,
simple_not: replaceInExpression,
semantic_and: nop,
semantic_not: nop,
optional: replaceInExpression,
zero_or_more: replaceInExpression,
one_or_more: replaceInExpression,
action: replaceInExpression,
rule_ref:
function(node, from, to) {
if (node.name === from) {
node.name = to;
}
},
literal: nop,
any: nop,
"class": nop
};
function replace(node, from, to) {
replaceFunctions[node.type](node, from, to);
}
replace(ast, from, to);
}
for (var name in ast.rules) {
if (isProxyRule(ast.rules[name])) {
replaceRuleRefs(ast, ast.rules[name].name, ast.rules[name].expression.name);
if (name === ast.startRule) {
ast.startRule = ast.rules[name].expression.name;
}
delete ast.rules[name];
}
}
return ast;
}
];
/* Emits the generated code for the AST. */
PEG.compiler.emitter = function(ast) {
/*
@ -1085,5 +718,3 @@ PEG.compiler.emitter = function(ast) {
return emit(ast);
};
})();

0
lib/metagrammar.js → src/parser.js

0
lib/metagrammar.pegjs → src/parser.pegjs

83
src/passes.js

@ -0,0 +1,83 @@
/*
* Optimalization passes made on the grammar AST before compilation. Each pass
* is a function that is passed the AST and returns a new AST. The AST can be
* modified in-place by the pass. The passes are run in sequence in order of
* their definition.
*/
PEG.compiler.passes = [
/*
* Removes proxy rules -- that is, rules that only delegate to other rule.
*/
function(ast) {
function isProxyRule(node) {
return node.type === "rule" && node.expression.type === "rule_ref";
}
function replaceRuleRefs(ast, from, to) {
function nop() {}
function replaceInExpression(node, from, to) {
replace(node.expression, from, to);
}
function replaceInSubnodes(propertyName) {
return function(node, from, to) {
PEG.ArrayUtils.each(node[propertyName], function(node) {
replace(node, from, to);
});
};
}
var replaceFunctions = {
grammar:
function(node, from, to) {
for (var name in node.rules) {
replace(ast.rules[name], from, to);
}
},
rule: replaceInExpression,
choice: replaceInSubnodes("alternatives"),
sequence: replaceInSubnodes("elements"),
labeled: replaceInExpression,
simple_and: replaceInExpression,
simple_not: replaceInExpression,
semantic_and: nop,
semantic_not: nop,
optional: replaceInExpression,
zero_or_more: replaceInExpression,
one_or_more: replaceInExpression,
action: replaceInExpression,
rule_ref:
function(node, from, to) {
if (node.name === from) {
node.name = to;
}
},
literal: nop,
any: nop,
"class": nop
};
function replace(node, from, to) {
replaceFunctions[node.type](node, from, to);
}
replace(ast, from, to);
}
for (var name in ast.rules) {
if (isProxyRule(ast.rules[name])) {
replaceRuleRefs(ast, ast.rules[name].name, ast.rules[name].expression.name);
if (name === ast.startRule) {
ast.startRule = ast.rules[name].expression.name;
}
delete ast.rules[name];
}
}
return ast;
}
];

43
src/peg.js

@ -0,0 +1,43 @@
(function(global, undefined) {
/* ===== PEG ===== */
var PEG = {
/*
* Generates a parser from a specified grammar and returns it.
*
* The grammar must be a string in the format described by the metagramar in
* the parser.pegjs file.
*
* Throws |PEG.parser.SyntaxError| if the grammar contains a syntax error or
* |PEG.GrammarError| if it contains a semantic error. Note that not all
* errors are detected during the generation and some may protrude to the
* generated parser and cause its malfunction.
*/
buildParser: function(grammar) {
return PEG.compiler.compile(PEG.parser.parse(grammar));
}
};
/* ===== PEG.GrammarError ===== */
/* Thrown when the grammar contains an error. */
PEG.GrammarError = function(message) {
this.name = "PEG.GrammarError";
this.message = message;
};
PEG.GrammarError.prototype = Error.prototype;
/* ===== Includes ===== */
// @include "utils.js"
// @include "parser.js"
// @include "compiler.js"
/* ===== Export ===== */
global.PEG = PEG;
})(this);

94
src/utils.js

@ -0,0 +1,94 @@
/* ===== PEG.ArrayUtils ===== */
/* Array manipulation utility functions. */
PEG.ArrayUtils = {
/*
* The code needs to be in sync with the code template in the compilation
* function for "action" nodes.
*/
contains: function(array, value) {
/*
* Stupid IE does not have Array.prototype.indexOf, otherwise this function
* would be a one-liner.
*/
var length = array.length;
for (var i = 0; i < length; i++) {
if (array[i] === value) {
return true;
}
}
return false;
},
each: function(array, callback) {
var length = array.length;
for (var i = 0; i < length; i++) {
callback(array[i]);
}
},
map: function(array, callback) {
var result = [];
var length = array.length;
for (var i = 0; i < length; i++) {
result[i] = callback(array[i]);
}
return result;
}
};
/* ===== PEG.StringUtils ===== */
/* String manipulation utility functions. */
PEG.StringUtils = {
/*
* Surrounds the string with quotes and escapes characters inside so that the
* result is a valid JavaScript string.
*
* The code needs to be in sync with th code template in the compilation
* function for "action" nodes.
*/
quote: function(s) {
/*
* ECMA-262, 5th ed., 7.8.4: All characters may appear literally in a string
* literal except for the closing quote character, backslash, carriage
* return, line separator, paragraph separator, and line feed. Any character
* may appear in the form of an escape sequence.
*/
return '"' + s
.replace(/\\/g, '\\\\') // backslash
.replace(/"/g, '\\"') // closing quote character
.replace(/\r/g, '\\r') // carriage return
.replace(/\u2028/g, '\\u2028') // line separator
.replace(/\u2029/g, '\\u2029') // paragraph separator
.replace(/\n/g, '\\n') // line feed
+ '"';
}
};
/* ===== PEG.RegExpUtils ===== */
/* RegExp manipulation utility functions. */
PEG.RegExpUtils = {
/*
* Escapes characters inside the string so that it can be used as a list of
* characters in a character class of a regular expression.
*/
quoteForClass: function(s) {
/* Based on ECMA-262, 5th ed., 7.8.5 & 15.10.1. */
return s
.replace(/\\/g, '\\\\') // backslash
.replace(/\0/g, '\\0') // null, IE needs this
.replace(/\//g, '\\/') // closing slash
.replace(/]/g, '\\]') // closing bracket
.replace(/-/g, '\\-') // dash
.replace(/\r/g, '\\r') // carriage return
.replace(/\u2028/g, '\\u2028') // line separator
.replace(/\u2029/g, '\\u2029') // paragraph separator
.replace(/\n/g, '\\n') // line feed
}
};

5
test/index.html

@ -5,10 +5,9 @@
<title>PEG.js Test Suite</title>
<link rel="stylesheet" href="../vendor/qunit/qunit.css">
<script src="../vendor/qunit/qunit.js"></script>
<script src="../lib/compiler.js"></script>
<script src="../lib/metagrammar.js"></script>
<script src="../lib/peg.js"></script>
<script src="compiler-test.js"></script>
<script src="metagrammar-test.js"></script>
<script src="parser-test.js"></script>
</head>
<body>
<h1 id="qunit-header">PEG.js Test Suite</h1>

0
test/metagrammar-test.js → test/parser-test.js

Loading…
Cancel
Save