Make the generated parsers standalone (no runtime is required).

This and also speeds up the benchmark suite execution by 7.83 % on V8.

Detailed results (benchmark suite totals):

---------------------------------
 Test #     Before       After
---------------------------------
      1   26.17 kB/s   28.16 kB/s
      2   26.05 kB/s   28.16 kB/s
      3   25.99 kB/s   28.10 kB/s
      4   26.13 kB/s   28.11 kB/s
      5   26.14 kB/s   28.07 kB/s
---------------------------------
Average   26.10 kB/s   28.14 kB/s
---------------------------------

Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/533.2 (KHTML, like Gecko) Chrome/5.0.342.7 Safari/533.2
redux
David Majda 14 years ago
parent 3f85a9ca84
commit e63f64a3d5

@ -39,18 +39,14 @@ The `bin/pegjs` command has several options that influence the generator—t
Let's assume that you want to use the parser in a web page. To do this, you need to:
1. Download the [minified parser runtime](http://pegjs.majda.cz/download#minified-parser-runtime) and include it into your page:
<script src="pegjs-runtime-0.3.min.js"></script>
2. Include the generated parser into your page:
1. Include the generated parser into your page:
<!-- Replace "example/arithmetics.js" with your parser file -->
<script src="example/arithmetics.js"></script>
This creates a variable with the parser object in the global scope (you can choose name of the variable when generating the parser).
3. Use the parser, i.e. call the `parse` method on the parser variable:
2. Use the parser, i.e. call the `parse` method on the parser variable:
<script>
// Replace "arithmeticsParser" with your parser variable

@ -1,26 +1,3 @@
require "net/http"
require "uri"
desc "Build the minified parser runtime"
task :minify do
response = Net::HTTP.post_form(
URI.parse("http://closure-compiler.appspot.com/compile"),
{
"js_code" => File.read("lib/runtime.js"),
"compilation_level" => "SIMPLE_OPTIMIZATIONS",
"output_format" => "text",
"output_info" => "compiled_code"
}
)
if response.code != "200"
abort "Error calling Google Closure Compiler API: #{response.message}"
end
version = File.read("VERSION").strip
File.open("lib/pegjs-runtime-#{version}.min.js", "w") { |f| f.write(response.body) }
end
desc "Generate the grammar parser"
task :metaparser do
system "bin/pegjs --start-rule grammar PEG.grammarParser lib/metagrammar.pegjs"

@ -41,7 +41,6 @@
</tr>
</table>
<script src="../lib/runtime.js"></script>
<script src="../lib/compiler.js"></script>
<script src="../lib/metagrammar.js"></script>
<script src="../vendor/jquery/jquery.js"></script>

@ -5,7 +5,6 @@ importPackage(java.lang);
* Rhino does not have __FILE__ or anything similar so we have to pass the
* script path from the outside.
*/
load(arguments[0] + "/../lib/runtime.js");
load(arguments[0] + "/../lib/compiler.js");
load(arguments[0] + "/../lib/metagrammar.js");

@ -1,8 +1,4 @@
/*
* PEG.js compiler.
*
* The runtime.js file must be included before this file.
*/
/* PEG.js compiler. */
(function() {
@ -10,6 +6,8 @@ function nop() {}
/* ===== PEG ===== */
/* no var */ PEG = {};
/*
* Generates a parser from a specified grammar and start rule and returns it.
*
@ -53,6 +51,82 @@ PEG.buildParser = function(grammar, startRule) {
return PEG.Compiler.compileParser(ast, startRule);
};
/* ===== PEG.ArrayUtils ===== */
/* Array manipulation utility functions. */
PEG.ArrayUtils = {
each: function(array, callback) {
var length = array.length;
for (var i = 0; i < length; i++) {
callback(array[i]);
}
},
map: function(array, callback) {
var result = [];
var length = array.length;
for (var i = 0; i < length; i++) {
result[i] = callback(array[i]);
}
return result;
}
};
/* ===== PEG.StringUtils ===== */
/* String manipulation utility functions. */
PEG.StringUtils = {
/*
* Surrounds the string with quotes and escapes characters inside so that the
* result is a valid JavaScript string.
*
* The code needs to be in sync with a code template in
* PEG.Grammar.Action.prototype.compile.
*/
quote: function(s) {
/*
* ECMA-262, 5th ed., 7.8.4: All characters may appear literally in a string
* literal except for the closing quote character, backslash, carriage
* return, line separator, paragraph separator, and line feed. Any character
* may appear in the form of an escape sequence.
*/
return '"' + s
.replace(/\\/g, '\\\\') // backslash
.replace(/"/g, '\\"') // closing quote character
.replace(/\r/g, '\\r') // carriage return
.replace(/\u2028/g, '\\u2028') // line separator
.replace(/\u2029/g, '\\u2029') // paragraph separator
.replace(/\n/g, '\\n') // line feed
+ '"';
}
};
/* ===== PEG.RegExpUtils ===== */
/* RegExp manipulation utility functions. */
PEG.RegExpUtils = {
/*
* Escapes characters inside the string so that it can be used as a list of
* characters in a character class of a regular expresion.
*/
quoteForClass: function(s) {
/* Based on ECMA-262, 5th ed., 7.8.5 & 15.10.1. */
return s
.replace(/\\/g, '\\\\') // backslash
.replace(/\//g, '\\/') // closing slash
.replace(/]/g, '\\]') // closing bracket
.replace(/-/g, '\\-') // dash
.replace(/\r/g, '\\r') // carriage return
.replace(/\u2028/g, '\\u2028') // line separator
.replace(/\u2029/g, '\\u2029') // paragraph separator
.replace(/\n/g, '\\n') // line feed
}
};
/* ===== PEG.Grammar ===== */
/* Namespace with grammar AST nodes. */
@ -348,9 +422,174 @@ PEG.Compiler = {
var source = this.formatCode(
"(function(){",
" var result = new PEG.Parser(${startRule|string});",
" var result = {",
" _startRule: ${startRule|string},",
" ",
/* This needs to be in sync with PEG.StringUtils.quote. */
" _quoteString: function(s) {",
" /*",
" * ECMA-262, 5th ed., 7.8.4: All characters may appear literally in a string",
" * literal except for the closing quote character, backslash, carriage",
" * return, line separator, paragraph separator, and line feed. Any character",
" * may appear in the form of an escape sequence.",
" */",
" return '\"' + s",
" .replace(/\\\\/g, '\\\\\\\\') // backslash",
" .replace(/\"/g, '\\\\\"') // closing quote character",
" .replace(/\\r/g, '\\\\r') // carriage return",
" .replace(/\\u2028/g, '\\\\u2028') // line separator",
" .replace(/\\u2029/g, '\\\\u2029') // paragraph separator",
" .replace(/\\n/g, '\\\\n') // line feed",
" + '\"';",
" },",
" ",
" _matchFailed: function(failure) {",
" if (this._pos > this._rightmostMatchFailuresPos) {",
" this._rightmostMatchFailuresPos = this._pos;",
" this._rightmostMatchFailuresExpected = [];",
" }",
" ",
" if (this._rightmostMatchFailuresExpected.indexOf(failure) === -1) {",
" this._rightmostMatchFailuresExpected.push(failure);",
" }",
" },",
" ",
" ${parseFunctionDefinitions}",
" ",
" /*",
" * Parses the input with a generated parser. If the parsing is successfull,",
" * returns a value explicitly or implicitly specified by the grammar from",
" * which the parser was generated (see |PEG.buildParser|). If the parsing is",
" * unsuccessful, throws |PEG.grammarParser.SyntaxError| describing the error.",
" */",
" parse: function(input) {",
" var that = this;",
" ",
" function initialize() {",
" that._input = input;",
" that._pos = 0;",
" that._rightmostMatchFailuresPos = 0;",
" that._rightmostMatchFailuresExpected = [];",
" that._cache = {};",
" }",
" ",
" function buildErrorMessage() {",
" function buildExpected(failuresExpected) {",
" switch (failuresExpected.length) {",
" case 0:",
" return 'end of input';",
" case 1:",
" return failuresExpected[0];",
" default:",
" failuresExpected.sort();",
" return failuresExpected.slice(0, failuresExpected.length - 1).join(', ')",
" + ' or '",
" + failuresExpected[failuresExpected.length - 1];",
" }",
" }",
" ",
" var expected = buildExpected(that._rightmostMatchFailuresExpected);",
" var pos = Math.max(that._pos, that._rightmostMatchFailuresPos);",
" var actual = pos < that._input.length",
" ? that._quoteString(that._input.charAt(pos))",
" : 'end of input';",
" ",
" return 'Expected ' + expected + ' but ' + actual + ' found.';",
" }",
" ",
" function computeErrorPosition() {",
" /*",
" * The first idea was to use |String.split| to break the input up to the",
" * error position along newlines and derive the line and column from",
" * there. However IE's |split| implementation is so broken that it was",
" * enough to prevent it.",
" */",
" ",
" var input = that._input;",
" var pos = that._rightmostMatchFailuresPos;",
" var line = 1;",
" var column = 1;",
" var seenCR = false;",
" ",
" for (var i = 0; i < pos; i++) {",
" var ch = input.charAt(i);",
" if (ch === '\\n') {",
" if (!seenCR) { line++; }",
" column = 1;",
" seenCR = false;",
" } else if (ch === '\\r' | ch === '\\u2028' || ch === '\\u2029') {",
" line++;",
" column = 1;",
" seenCR = true;",
" } else {",
" column++;",
" seenCR = false;",
" }",
" }",
" ",
" return { line: line, column: column };",
" }",
" ",
" initialize();",
" ",
" var initialContext = {",
" reportMatchFailures: true",
" };",
" ",
" var result = this['_parse_' + this._startRule](initialContext);",
" ",
" /*",
" * The parser is now in one of the following three states:",
" *",
" * 1. The parser successfully parsed the whole input.",
" *",
" * - |result !== null|",
" * - |that._pos === input.length|",
" * - |that._rightmostMatchFailuresExpected.length| may or may not contain",
" * something",
" *",
" * 2. The parser successfully parsed only a part of the input.",
" *",
" * - |result !== null|",
" * - |that._pos < input.length|",
" * - |that._rightmostMatchFailuresExpected.length| may or may not contain",
" * something",
" *",
" * 3. The parser did not successfully parse any part of the input.",
" *",
" * - |result === null|",
" * - |that._pos === 0|",
" * - |that._rightmostMatchFailuresExpected.length| contains at least one failure",
" *",
" * All code following this comment (including called functions) must",
" * handle these states.",
" */",
" if (result === null || this._pos !== input.length) {",
" var errorPosition = computeErrorPosition();",
" throw new this.SyntaxError(",
" buildErrorMessage(),",
" errorPosition.line,",
" errorPosition.column",
" );",
" }",
" ",
" return result;",
" },",
" ",
" /* Returns the parser source code. */",
" toSource: function() { return this._source; }",
" };",
" ",
" /* Thrown when a parser encounters a syntax error. */",
" ",
" result.SyntaxError = function(message, line, column) {",
" this.name = 'SyntaxError';",
" this.message = message;",
" this.line = line;",
" this.column = column;",
" };",
" ",
" ${parseFunctionDefinitions}",
" result.SyntaxError.prototype = Error.prototype;",
" ",
" return result;",
"})()",
@ -393,7 +632,7 @@ PEG.Grammar.Rule.prototype.compile = function() {
}
return PEG.Compiler.formatCode(
"result._parse_${name} = function(context) {",
"_parse_${name}: function(context) {",
" var cacheKey = ${name|string} + '@' + this._pos;",
" var cachedResult = this._cache[cacheKey];",
" if (cachedResult !== undefined) {",
@ -413,7 +652,7 @@ PEG.Grammar.Rule.prototype.compile = function() {
" result: ${resultVar}",
" };",
" return ${resultVar};",
"};",
"},",
{
name: this._name,
setReportMatchFailuresCode: setReportMatchFailuresCode,
@ -450,7 +689,7 @@ PEG.Grammar.Literal.prototype.compile = function(resultVar) {
"} else {",
" var ${resultVar} = null;",
" if (context.reportMatchFailures) {",
" this._matchFailed(PEG.StringUtils.quote(${value|string}));",
" this._matchFailed(this._quoteString(${value|string}));",
" }",
"}",
{

File diff suppressed because it is too large Load Diff

@ -131,7 +131,7 @@ class "character class": "[" "^"? (classCharacterRange / classCharacter)* "]" __
classCharacterRange: bracketDelimitedCharacter "-" bracketDelimitedCharacter {
if ($1.charCodeAt(0) > $3.charCodeAt(0)) {
throw new PEG.Parser.SyntaxError(
throw new this.SyntaxError(
"Invalid character range: "
+ PEG.RegExpUtils.quoteForClass($1)
+ "-"

@ -1,239 +0,0 @@
/*
* PEG.js runtime.
*
* Required by all parsers generated by PEG.js.
*/
PEG = {};
(function() {
/* ===== PEG.ArrayUtils ===== */
/* Array manipulation utility functions. */
PEG.ArrayUtils = {
each: function(array, callback) {
var length = array.length;
for (var i = 0; i < length; i++) {
callback(array[i]);
}
},
map: function(array, callback) {
var result = [];
var length = array.length;
for (var i = 0; i < length; i++) {
result[i] = callback(array[i]);
}
return result;
}
};
/* ===== PEG.StringUtils ===== */
/* String manipulation utility functions. */
PEG.StringUtils = {
/*
* Surrounds the string with quotes and escapes characters inside so that the
* result is a valid JavaScript string.
*/
quote: function(s) {
/*
* ECMA-262, 5th ed., 7.8.4: All characters may appear literally in a string
* literal except for the closing quote character, backslash, carriage
* return, line separator, paragraph separator, and line feed. Any character
* may appear in the form of an escape sequence.
*/
return '"' + s
.replace(/\\/g, '\\\\') // backslash
.replace(/"/g, '\\"') // closing quote character
.replace(/\r/g, '\\r') // carriage return
.replace(/\u2028/g, '\\u2028') // line separator
.replace(/\u2029/g, '\\u2029') // paragraph separator
.replace(/\n/g, '\\n') // line feed
+ '"';
}
};
/* ===== PEG.RegExpUtils ===== */
/* RegExp manipulation utility functions. */
PEG.RegExpUtils = {
/*
* Escapes characters inside the string so that it can be used as a list of
* characters in a character class of a regular expresion.
*/
quoteForClass: function(s) {
/* Based on ECMA-262, 5th ed., 7.8.5 & 15.10.1. */
return s
.replace(/\\/g, '\\\\') // backslash
.replace(/\//g, '\\/') // closing slash
.replace(/]/g, '\\]') // closing bracket
.replace(/-/g, '\\-') // dash
.replace(/\r/g, '\\r') // carriage return
.replace(/\u2028/g, '\\u2028') // line separator
.replace(/\u2029/g, '\\u2029') // paragraph separator
.replace(/\n/g, '\\n') // line feed
}
};
/* ===== PEG.Parser ===== */
/* Prototype of all parsers generated by PEG.js. */
PEG.Parser = function(startRule) { this._startRule = startRule; }
PEG.Parser.prototype = {
_matchFailed: function(failure) {
if (this._pos > this._rightmostMatchFailuresPos) {
this._rightmostMatchFailuresPos = this._pos;
this._rightmostMatchFailuresExpected = [];
}
if (this._rightmostMatchFailuresExpected.indexOf(failure) === -1) {
this._rightmostMatchFailuresExpected.push(failure);
}
},
/*
* Parses the input with a generated parser. If the parsing is successfull,
* returns a value explicitly or implicitly specified by the grammar from
* which the parser was generated (see |PEG.buildParser|). If the parsing is
* unsuccessful, throws |PEG.Parser.SyntaxError| describing the error.
*/
parse: function(input) {
var that = this;
function initialize() {
that._input = input;
that._pos = 0;
that._rightmostMatchFailuresPos = 0;
that._rightmostMatchFailuresExpected = [];
that._cache = {};
}
function buildErrorMessage() {
function buildExpected(failuresExpected) {
switch (failuresExpected.length) {
case 0:
return "end of input";
case 1:
return failuresExpected[0];
default:
failuresExpected.sort();
return failuresExpected.slice(0, failuresExpected.length - 1).join(", ")
+ " or "
+ failuresExpected[failuresExpected.length - 1];
}
}
var expected = buildExpected(that._rightmostMatchFailuresExpected);
var pos = Math.max(that._pos, that._rightmostMatchFailuresPos);
var actual = pos < that._input.length
? PEG.StringUtils.quote(that._input.charAt(pos))
: "end of input";
return "Expected " + expected + " but " + actual + " found.";
}
function computeErrorPosition() {
/*
* The first idea was to use |String.split| to break the input up to the
* error position along newlines and derive the line and column from
* there. However IE's |split| implementation is so broken that it was
* enough to prevent it.
*/
var input = that._input;
var pos = that._rightmostMatchFailuresPos;
var line = 1;
var column = 1;
var seenCR = false;
for (var i = 0; i < pos; i++) {
var ch = input.charAt(i);
if (ch === "\n") {
if (!seenCR) { line++; }
column = 1;
seenCR = false;
} else if (ch === "\r" | ch === "\u2028" || ch === "\u2029") {
line++;
column = 1;
seenCR = true;
} else {
column++;
seenCR = false;
}
}
return { line: line, column: column };
}
initialize();
var initialContext = {
reportMatchFailures: true
};
var result = this["_parse_" + this._startRule](initialContext);
/*
* The parser is now in one of the following three states:
*
* 1. The parser successfully parsed the whole input.
*
* - |result !== null|
* - |that._pos === input.length|
* - |that._rightmostMatchFailuresExpected.length| may or may not contain
* something
*
* 2. The parser successfully parsed only a part of the input.
*
* - |result !== null|
* - |that._pos < input.length|
* - |that._rightmostMatchFailuresExpected.length| may or may not contain
* something
*
* 3. The parser did not successfully parse any part of the input.
*
* - |result === null|
* - |that._pos === 0|
* - |that._rightmostMatchFailuresExpected.length| contains at least one failure
*
* All code following this comment (including called functions) must
* handle these states.
*/
if (result === null || this._pos !== input.length) {
var errorPosition = computeErrorPosition();
throw new PEG.Parser.SyntaxError(
buildErrorMessage(),
errorPosition.line,
errorPosition.column
);
}
return result;
},
/* Returns the parser source code. */
toSource: function() { return this._source; }
};
/* ===== PEG.Parser.SyntaxError ===== */
/* Thrown when a parser encounters a syntax error. */
PEG.Parser.SyntaxError = function(message, line, column) {
this.name = "PEG.Parser.SyntaxError";
this.message = message;
this.line = line;
this.column = column;
};
PEG.Parser.SyntaxError.prototype = Error.prototype;
})();

@ -33,13 +33,13 @@ global.parses = function(parser, input, expected) {
};
global.doesNotParse = function(parser, input) {
throws(function() { parser.parse(input); }, PEG.Parser.SyntaxError);
throws(function() { parser.parse(input); }, parser.SyntaxError);
};
global.doesNotParseWithMessage = function(parser, input, message) {
var exception = throws(
function() { parser.parse(input); },
PEG.Parser.SyntaxError
parser.SyntaxError
);
if (exception) {
strictEqual(exception.message, message);
@ -49,7 +49,7 @@ global.doesNotParseWithMessage = function(parser, input, message) {
global.doesNotParseWithPos = function(parser, input, line, column) {
var exception = throws(
function() { parser.parse(input); },
PEG.Parser.SyntaxError
parser.SyntaxError
);
if (exception) {
strictEqual(exception.line, line);
@ -57,6 +57,56 @@ global.doesNotParseWithPos = function(parser, input, line, column) {
}
};
/* ===== PEG.ArrayUtils ===== */
module("PEG.ArrayUtils");
test("each", function() {
var sum;
function increment(x) { sum += x; }
sum = 0;
PEG.ArrayUtils.each([], increment);
strictEqual(sum, 0);
sum = 0;
PEG.ArrayUtils.each([1, 2, 3], increment);
strictEqual(sum, 6);
});
test("map", function() {
function square(x) { return x * x; }
deepEqual(PEG.ArrayUtils.map([], square), []);
deepEqual(PEG.ArrayUtils.map([1, 2, 3], square), [1, 4, 9]);
});
/* ===== PEG.StringUtils ===== */
module("PEG.StringUtils");
test("quote", function() {
strictEqual(PEG.StringUtils.quote(""), '""');
strictEqual(PEG.StringUtils.quote("abcd"), '"abcd"');
strictEqual(
PEG.StringUtils.quote("\"\\\r\u2028\u2029\n\"\\\r\u2028\u2029\n"),
'"\\\"\\\\\\r\\u2028\\u2029\\n\\\"\\\\\\r\\u2028\\u2029\\n"'
);
});
/* ===== PEG.RegExpUtils ===== */
module("PEG.RegExpUtils");
test("quoteForClass", function() {
strictEqual(PEG.RegExpUtils.quoteForClass(""), '');
strictEqual(PEG.RegExpUtils.quoteForClass("abcd"), 'abcd');
strictEqual(
PEG.RegExpUtils.quoteForClass("\\/]-\r\u2028\u2029\n\\/]-\r\u2028\u2029\n"),
'\\\\\\/\\]\\-\\r\\u2028\\u2029\\n\\\\\\/\\]\\-\\r\\u2028\\u2029\\n'
);
});
/* ===== PEG.Compiler ===== */
module("PEG.Compiler");

@ -5,10 +5,8 @@
<title>PEG.js Test Suite</title>
<link rel="stylesheet" href="../vendor/qunit/qunit.css">
<script src="../vendor/qunit/qunit.js"></script>
<script src="../lib/runtime.js"></script>
<script src="../lib/compiler.js"></script>
<script src="../lib/metagrammar.js"></script>
<script src="runtime-test.js"></script>
<script src="compiler-test.js"></script>
<script src="metagrammar-test.js"></script>
</head>

@ -1,53 +0,0 @@
(function() {
/* ===== PEG.ArrayUtils ===== */
module("PEG.ArrayUtils");
test("each", function() {
var sum;
function increment(x) { sum += x; }
sum = 0;
PEG.ArrayUtils.each([], increment);
strictEqual(sum, 0);
sum = 0;
PEG.ArrayUtils.each([1, 2, 3], increment);
strictEqual(sum, 6);
});
test("map", function() {
function square(x) { return x * x; }
deepEqual(PEG.ArrayUtils.map([], square), []);
deepEqual(PEG.ArrayUtils.map([1, 2, 3], square), [1, 4, 9]);
});
/* ===== PEG.StringUtils ===== */
module("PEG.StringUtils");
test("quote", function() {
strictEqual(PEG.StringUtils.quote(""), '""');
strictEqual(PEG.StringUtils.quote("abcd"), '"abcd"');
strictEqual(
PEG.StringUtils.quote("\"\\\r\u2028\u2029\n\"\\\r\u2028\u2029\n"),
'"\\\"\\\\\\r\\u2028\\u2029\\n\\\"\\\\\\r\\u2028\\u2029\\n"'
);
});
/* ===== PEG.RegExpUtils ===== */
module("PEG.RegExpUtils");
test("quoteForClass", function() {
strictEqual(PEG.RegExpUtils.quoteForClass(""), '');
strictEqual(PEG.RegExpUtils.quoteForClass("abcd"), 'abcd');
strictEqual(
PEG.RegExpUtils.quoteForClass("\\/]-\r\u2028\u2029\n\\/]-\r\u2028\u2029\n"),
'\\\\\\/\\]\\-\\r\\u2028\\u2029\\n\\\\\\/\\]\\-\\r\\u2028\\u2029\\n'
);
});
})();
Loading…
Cancel
Save