pegjs/src/utils.js

210 lines
5.3 KiB
JavaScript
Raw Normal View History

Rewrite variable handling in generated parsers Before this commit, variables for saving match results and parse positions in generated parsers were not used efficiently. Each rule basically used its own variable(s) for storing the data, with names generated sequentially during code emitting. There was no reuse of variables and a lot of unnecessary assignments between them. It is easy to see that both match results and parse positions can actually be stored on a stack that grows as the parser walks deeper in the grammar tree and shrinks as it returns. Moreover, if one creates a new stack for each rule the parser enters, its maximum depth can be computed statically from the grammar. This allows us to implement the stack not as an array, but as a set of numbered variables in each function that handles parsing of a grammar rule, avoiding potentially slow array accesses. This commit implements the idea from the previous paragraph, using separate stack for match results and for parse positions. As a result, defined variables are reused and unnecessary copying avoided. Speed implications ------------------ This change speeds up the benchmark suite execution by 2.14%. Detailed results (benchmark suite totals as reported by "jake benchmark" on Node.js 0.4.8): ----------------------------------- Test # Before After ----------------------------------- 1 129.01 kB/s 131.98 kB/s 2 129.39 kB/s 130.13 kB/s 3 128.63 kB/s 132.57 kB/s 4 127.53 kB/s 129.82 kB/s 5 127.98 kB/s 131.80 kB/s ----------------------------------- Average 128.51 kB/s 131.26 kB/s ----------------------------------- Size implications ----------------- This change makes a sample of generated parsers 8.60% smaller: Before: $ wc -c src/parser.js examples/*.js 110867 src/parser.js 13886 examples/arithmetics.js 450125 examples/css.js 632390 examples/javascript.js 61365 examples/json.js 1268633 total After: $ wc -c src/parser.js examples/*.js 99597 src/parser.js 13077 examples/arithmetics.js 399893 examples/css.js 592044 examples/javascript.js 54797 examples/json.js 1159408 total
2011-08-22 19:48:43 +02:00
/* Like Python's |range|, but without |step|. */
function range(start, stop) {
if (stop === undefined) {
stop = start;
start = 0;
}
var result = new Array(Math.max(0, stop - start));
for (var i = 0, j = start; j < stop; i++, j++) {
result[i] = j;
}
return result;
}
function find(array, callback) {
var length = array.length;
for (var i = 0; i < length; i++) {
if (callback(array[i])) {
return array[i];
}
}
}
function contains(array, value) {
/*
* Stupid IE does not have Array.prototype.indexOf, otherwise this function
* would be a one-liner.
*/
var length = array.length;
for (var i = 0; i < length; i++) {
if (array[i] === value) {
return true;
}
}
return false;
}
function each(array, callback) {
var length = array.length;
for (var i = 0; i < length; i++) {
Rewrite variable handling in generated parsers Before this commit, variables for saving match results and parse positions in generated parsers were not used efficiently. Each rule basically used its own variable(s) for storing the data, with names generated sequentially during code emitting. There was no reuse of variables and a lot of unnecessary assignments between them. It is easy to see that both match results and parse positions can actually be stored on a stack that grows as the parser walks deeper in the grammar tree and shrinks as it returns. Moreover, if one creates a new stack for each rule the parser enters, its maximum depth can be computed statically from the grammar. This allows us to implement the stack not as an array, but as a set of numbered variables in each function that handles parsing of a grammar rule, avoiding potentially slow array accesses. This commit implements the idea from the previous paragraph, using separate stack for match results and for parse positions. As a result, defined variables are reused and unnecessary copying avoided. Speed implications ------------------ This change speeds up the benchmark suite execution by 2.14%. Detailed results (benchmark suite totals as reported by "jake benchmark" on Node.js 0.4.8): ----------------------------------- Test # Before After ----------------------------------- 1 129.01 kB/s 131.98 kB/s 2 129.39 kB/s 130.13 kB/s 3 128.63 kB/s 132.57 kB/s 4 127.53 kB/s 129.82 kB/s 5 127.98 kB/s 131.80 kB/s ----------------------------------- Average 128.51 kB/s 131.26 kB/s ----------------------------------- Size implications ----------------- This change makes a sample of generated parsers 8.60% smaller: Before: $ wc -c src/parser.js examples/*.js 110867 src/parser.js 13886 examples/arithmetics.js 450125 examples/css.js 632390 examples/javascript.js 61365 examples/json.js 1268633 total After: $ wc -c src/parser.js examples/*.js 99597 src/parser.js 13077 examples/arithmetics.js 399893 examples/css.js 592044 examples/javascript.js 54797 examples/json.js 1159408 total
2011-08-22 19:48:43 +02:00
callback(array[i], i);
}
}
function map(array, callback) {
var result = [];
var length = array.length;
for (var i = 0; i < length; i++) {
Rewrite variable handling in generated parsers Before this commit, variables for saving match results and parse positions in generated parsers were not used efficiently. Each rule basically used its own variable(s) for storing the data, with names generated sequentially during code emitting. There was no reuse of variables and a lot of unnecessary assignments between them. It is easy to see that both match results and parse positions can actually be stored on a stack that grows as the parser walks deeper in the grammar tree and shrinks as it returns. Moreover, if one creates a new stack for each rule the parser enters, its maximum depth can be computed statically from the grammar. This allows us to implement the stack not as an array, but as a set of numbered variables in each function that handles parsing of a grammar rule, avoiding potentially slow array accesses. This commit implements the idea from the previous paragraph, using separate stack for match results and for parse positions. As a result, defined variables are reused and unnecessary copying avoided. Speed implications ------------------ This change speeds up the benchmark suite execution by 2.14%. Detailed results (benchmark suite totals as reported by "jake benchmark" on Node.js 0.4.8): ----------------------------------- Test # Before After ----------------------------------- 1 129.01 kB/s 131.98 kB/s 2 129.39 kB/s 130.13 kB/s 3 128.63 kB/s 132.57 kB/s 4 127.53 kB/s 129.82 kB/s 5 127.98 kB/s 131.80 kB/s ----------------------------------- Average 128.51 kB/s 131.26 kB/s ----------------------------------- Size implications ----------------- This change makes a sample of generated parsers 8.60% smaller: Before: $ wc -c src/parser.js examples/*.js 110867 src/parser.js 13886 examples/arithmetics.js 450125 examples/css.js 632390 examples/javascript.js 61365 examples/json.js 1268633 total After: $ wc -c src/parser.js examples/*.js 99597 src/parser.js 13077 examples/arithmetics.js 399893 examples/css.js 592044 examples/javascript.js 54797 examples/json.js 1159408 total
2011-08-22 19:48:43 +02:00
result[i] = callback(array[i], i);
}
return result;
}
function pluck(array, key) {
return map(array, function (e) { return e[key]; });
}
function keys(object) {
var result = [];
for (var key in object) {
result.push(key);
}
return result;
}
function values(object) {
var result = [];
for (var key in object) {
result.push(object[key]);
}
return result;
}
function clone(object) {
var result = {};
for (var key in object) {
result[key] = object[key];
}
return result;
}
function defaults(object, defaults) {
for (var key in defaults) {
if (object[key] === undefined) {
object[key] = defaults[key];
}
}
}
/*
* The code needs to be in sync with the code template in the compilation
* function for "action" nodes.
*/
function subclass(child, parent) {
function ctor() { this.constructor = child; }
ctor.prototype = parent.prototype;
child.prototype = new ctor();
}
/*
* Returns a string padded on the left to a desired length with a character.
*
* The code needs to be in sync with the code template in the compilation
* function for "action" nodes.
*/
function padLeft(input, padding, length) {
var result = input;
var padLength = length - input.length;
for (var i = 0; i < padLength; i++) {
result = padding + result;
}
return result;
}
/*
* Returns an escape sequence for given character. Uses \x for characters <=
* 0xFF to save space, \u for the rest.
*
* The code needs to be in sync with the code template in the compilation
* function for "action" nodes.
*/
function escape(ch) {
var charCode = ch.charCodeAt(0);
var escapeChar;
var length;
2011-01-26 14:10:00 +01:00
if (charCode <= 0xFF) {
escapeChar = 'x';
length = 2;
} else {
escapeChar = 'u';
length = 4;
}
return '\\' + escapeChar + padLeft(charCode.toString(16).toUpperCase(), '0', length);
}
/*
* Surrounds the string with quotes and escapes characters inside so that the
* result is a valid JavaScript string.
*
* The code needs to be in sync with the code template in the compilation
* function for "action" nodes.
*/
function quote(s) {
/*
* ECMA-262, 5th ed., 7.8.4: All characters may appear literally in a string
* literal except for the closing quote character, backslash, carriage return,
* line separator, paragraph separator, and line feed. Any character may
* appear in the form of an escape sequence.
*
* For portability, we also escape escape all control and non-ASCII
* characters. Note that "\0" and "\v" escape sequences are not used because
* JSHint does not like the first and IE the second.
*/
return '"' + s
.replace(/\\/g, '\\\\') // backslash
.replace(/"/g, '\\"') // closing quote character
.replace(/\x08/g, '\\b') // backspace
.replace(/\t/g, '\\t') // horizontal tab
.replace(/\n/g, '\\n') // line feed
.replace(/\f/g, '\\f') // form feed
.replace(/\r/g, '\\r') // carriage return
.replace(/[\x00-\x07\x0B\x0E-\x1F\x80-\uFFFF]/g, escape)
+ '"';
}
/*
* Escapes characters inside the string so that it can be used as a list of
* characters in a character class of a regular expression.
*/
function quoteForRegexpClass(s) {
/*
* Based on ECMA-262, 5th ed., 7.8.5 & 15.10.1.
*
* For portability, we also escape escape all control and non-ASCII
* characters.
*/
return s
.replace(/\\/g, '\\\\') // backslash
.replace(/\//g, '\\/') // closing slash
.replace(/\]/g, '\\]') // closing bracket
.replace(/-/g, '\\-') // dash
.replace(/\0/g, '\\0') // null
.replace(/\t/g, '\\t') // horizontal tab
.replace(/\n/g, '\\n') // line feed
.replace(/\v/g, '\\x0B') // vertical tab
.replace(/\f/g, '\\f') // form feed
.replace(/\r/g, '\\r') // carriage return
.replace(/[\x01-\x08\x0E-\x1F\x80-\uFFFF]/g, escape);
}
2010-08-17 20:34:14 +02:00
/*
* Builds a node visitor -- a function which takes a node and any number of
* other parameters, calls an appropriate function according to the node type,
* passes it all its parameters and returns its value. The functions for various
* node types are passed in a parameter to |buildNodeVisitor| as a hash.
*/
function buildNodeVisitor(functions) {
return function(node) {
return functions[node.type].apply(null, arguments);
};
2010-08-17 20:34:14 +02:00
}
function findRuleByName(ast, name) {
return find(ast.rules, function(r) { return r.name === name; });
}