You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

161 lines
4.8 KiB
JavaScript

/*
* Optimalization passes made on the grammar AST before compilation. Each pass
* is a function that is passed the AST and returns a new AST. The AST can be
* modified in-place by the pass. The order in which the passes are run is
* specified in |PEG.compiler.compile| and should be the same as the order of
* definitions here.
*/
PEG.compiler.passes = {
/*
* Removes proxy rules -- that is, rules that only delegate to other rule.
*/
removeProxyRules: function(ast) {
function isProxyRule(node) {
return node.type === "rule" && node.expression.type === "rule_ref";
}
function replaceRuleRefs(ast, from, to) {
function nop() {}
function replaceInExpression(node, from, to) {
replace(node.expression, from, to);
}
function replaceInSubnodes(propertyName) {
return function(node, from, to) {
each(node[propertyName], function(subnode) {
replace(subnode, from, to);
});
};
}
var replace = buildNodeVisitor({
grammar:
function(node, from, to) {
for (var name in node.rules) {
replace(node.rules[name], from, to);
}
},
rule: replaceInExpression,
choice: replaceInSubnodes("alternatives"),
sequence: replaceInSubnodes("elements"),
labeled: replaceInExpression,
simple_and: replaceInExpression,
simple_not: replaceInExpression,
semantic_and: nop,
semantic_not: nop,
optional: replaceInExpression,
zero_or_more: replaceInExpression,
one_or_more: replaceInExpression,
action: replaceInExpression,
rule_ref:
function(node, from, to) {
if (node.name === from) {
node.name = to;
}
},
literal: nop,
any: nop,
"class": nop
});
replace(ast, from, to);
}
for (var name in ast.rules) {
if (isProxyRule(ast.rules[name])) {
replaceRuleRefs(ast, ast.rules[name].name, ast.rules[name].expression.name);
if (name === ast.startRule) {
ast.startRule = ast.rules[name].expression.name;
}
delete ast.rules[name];
}
}
Rewrite variable handling in generated parsers Before this commit, variables for saving match results and parse positions in generated parsers were not used efficiently. Each rule basically used its own variable(s) for storing the data, with names generated sequentially during code emitting. There was no reuse of variables and a lot of unnecessary assignments between them. It is easy to see that both match results and parse positions can actually be stored on a stack that grows as the parser walks deeper in the grammar tree and shrinks as it returns. Moreover, if one creates a new stack for each rule the parser enters, its maximum depth can be computed statically from the grammar. This allows us to implement the stack not as an array, but as a set of numbered variables in each function that handles parsing of a grammar rule, avoiding potentially slow array accesses. This commit implements the idea from the previous paragraph, using separate stack for match results and for parse positions. As a result, defined variables are reused and unnecessary copying avoided. Speed implications ------------------ This change speeds up the benchmark suite execution by 2.14%. Detailed results (benchmark suite totals as reported by "jake benchmark" on Node.js 0.4.8): ----------------------------------- Test # Before After ----------------------------------- 1 129.01 kB/s 131.98 kB/s 2 129.39 kB/s 130.13 kB/s 3 128.63 kB/s 132.57 kB/s 4 127.53 kB/s 129.82 kB/s 5 127.98 kB/s 131.80 kB/s ----------------------------------- Average 128.51 kB/s 131.26 kB/s ----------------------------------- Size implications ----------------- This change makes a sample of generated parsers 8.60% smaller: Before: $ wc -c src/parser.js examples/*.js 110867 src/parser.js 13886 examples/arithmetics.js 450125 examples/css.js 632390 examples/javascript.js 61365 examples/json.js 1268633 total After: $ wc -c src/parser.js examples/*.js 99597 src/parser.js 13077 examples/arithmetics.js 399893 examples/css.js 592044 examples/javascript.js 54797 examples/json.js 1159408 total
13 years ago
return ast;
},
/*
* Adds |resultStackDepth| and |posStackDepth| properties to each AST node.
* These properties specify how many positions on the result or position stack
* code generated by the emitter for the node will use. This information is
* used to declare varibles holding the stack data in the generated code.
*/
computeStackDepths: function(ast) {
Rewrite variable handling in generated parsers Before this commit, variables for saving match results and parse positions in generated parsers were not used efficiently. Each rule basically used its own variable(s) for storing the data, with names generated sequentially during code emitting. There was no reuse of variables and a lot of unnecessary assignments between them. It is easy to see that both match results and parse positions can actually be stored on a stack that grows as the parser walks deeper in the grammar tree and shrinks as it returns. Moreover, if one creates a new stack for each rule the parser enters, its maximum depth can be computed statically from the grammar. This allows us to implement the stack not as an array, but as a set of numbered variables in each function that handles parsing of a grammar rule, avoiding potentially slow array accesses. This commit implements the idea from the previous paragraph, using separate stack for match results and for parse positions. As a result, defined variables are reused and unnecessary copying avoided. Speed implications ------------------ This change speeds up the benchmark suite execution by 2.14%. Detailed results (benchmark suite totals as reported by "jake benchmark" on Node.js 0.4.8): ----------------------------------- Test # Before After ----------------------------------- 1 129.01 kB/s 131.98 kB/s 2 129.39 kB/s 130.13 kB/s 3 128.63 kB/s 132.57 kB/s 4 127.53 kB/s 129.82 kB/s 5 127.98 kB/s 131.80 kB/s ----------------------------------- Average 128.51 kB/s 131.26 kB/s ----------------------------------- Size implications ----------------- This change makes a sample of generated parsers 8.60% smaller: Before: $ wc -c src/parser.js examples/*.js 110867 src/parser.js 13886 examples/arithmetics.js 450125 examples/css.js 632390 examples/javascript.js 61365 examples/json.js 1268633 total After: $ wc -c src/parser.js examples/*.js 99597 src/parser.js 13077 examples/arithmetics.js 399893 examples/css.js 592044 examples/javascript.js 54797 examples/json.js 1159408 total
13 years ago
function computeZeroes(node) {
node.resultStackDepth = 0;
node.posStackDepth = 0;
}
function computeFromExpression(resultStackDelta, posStackDelta) {
return function(node) {
compute(node.expression);
node.resultStackDepth = node.expression.resultStackDepth + resultStackDelta;
node.posStackDepth = node.expression.posStackDepth + posStackDelta;
};
Rewrite variable handling in generated parsers Before this commit, variables for saving match results and parse positions in generated parsers were not used efficiently. Each rule basically used its own variable(s) for storing the data, with names generated sequentially during code emitting. There was no reuse of variables and a lot of unnecessary assignments between them. It is easy to see that both match results and parse positions can actually be stored on a stack that grows as the parser walks deeper in the grammar tree and shrinks as it returns. Moreover, if one creates a new stack for each rule the parser enters, its maximum depth can be computed statically from the grammar. This allows us to implement the stack not as an array, but as a set of numbered variables in each function that handles parsing of a grammar rule, avoiding potentially slow array accesses. This commit implements the idea from the previous paragraph, using separate stack for match results and for parse positions. As a result, defined variables are reused and unnecessary copying avoided. Speed implications ------------------ This change speeds up the benchmark suite execution by 2.14%. Detailed results (benchmark suite totals as reported by "jake benchmark" on Node.js 0.4.8): ----------------------------------- Test # Before After ----------------------------------- 1 129.01 kB/s 131.98 kB/s 2 129.39 kB/s 130.13 kB/s 3 128.63 kB/s 132.57 kB/s 4 127.53 kB/s 129.82 kB/s 5 127.98 kB/s 131.80 kB/s ----------------------------------- Average 128.51 kB/s 131.26 kB/s ----------------------------------- Size implications ----------------- This change makes a sample of generated parsers 8.60% smaller: Before: $ wc -c src/parser.js examples/*.js 110867 src/parser.js 13886 examples/arithmetics.js 450125 examples/css.js 632390 examples/javascript.js 61365 examples/json.js 1268633 total After: $ wc -c src/parser.js examples/*.js 99597 src/parser.js 13077 examples/arithmetics.js 399893 examples/css.js 592044 examples/javascript.js 54797 examples/json.js 1159408 total
13 years ago
}
var compute = buildNodeVisitor({
grammar:
function(node) {
for (var name in node.rules) {
compute(node.rules[name]);
}
},
rule: computeFromExpression(1, 0),
Rewrite variable handling in generated parsers Before this commit, variables for saving match results and parse positions in generated parsers were not used efficiently. Each rule basically used its own variable(s) for storing the data, with names generated sequentially during code emitting. There was no reuse of variables and a lot of unnecessary assignments between them. It is easy to see that both match results and parse positions can actually be stored on a stack that grows as the parser walks deeper in the grammar tree and shrinks as it returns. Moreover, if one creates a new stack for each rule the parser enters, its maximum depth can be computed statically from the grammar. This allows us to implement the stack not as an array, but as a set of numbered variables in each function that handles parsing of a grammar rule, avoiding potentially slow array accesses. This commit implements the idea from the previous paragraph, using separate stack for match results and for parse positions. As a result, defined variables are reused and unnecessary copying avoided. Speed implications ------------------ This change speeds up the benchmark suite execution by 2.14%. Detailed results (benchmark suite totals as reported by "jake benchmark" on Node.js 0.4.8): ----------------------------------- Test # Before After ----------------------------------- 1 129.01 kB/s 131.98 kB/s 2 129.39 kB/s 130.13 kB/s 3 128.63 kB/s 132.57 kB/s 4 127.53 kB/s 129.82 kB/s 5 127.98 kB/s 131.80 kB/s ----------------------------------- Average 128.51 kB/s 131.26 kB/s ----------------------------------- Size implications ----------------- This change makes a sample of generated parsers 8.60% smaller: Before: $ wc -c src/parser.js examples/*.js 110867 src/parser.js 13886 examples/arithmetics.js 450125 examples/css.js 632390 examples/javascript.js 61365 examples/json.js 1268633 total After: $ wc -c src/parser.js examples/*.js 99597 src/parser.js 13077 examples/arithmetics.js 399893 examples/css.js 592044 examples/javascript.js 54797 examples/json.js 1159408 total
13 years ago
choice:
function(node) {
each(node.alternatives, compute);
node.resultStackDepth = Math.max.apply(
null,
map(node.alternatives, function(e) { return e.resultStackDepth; })
);
node.posStackDepth = Math.max.apply(
null,
map(node.alternatives, function(e) { return e.posStackDepth; })
);
},
sequence:
function(node) {
each(node.elements, compute);
node.resultStackDepth = node.elements.length > 0
? Math.max.apply(
null,
map(node.elements, function(e, i) { return i + e.resultStackDepth; })
)
: 0;
node.posStackDepth = node.elements.length > 0
? 1 + Math.max.apply(
null,
map(node.elements, function(e) { return e.posStackDepth; })
)
: 1;
Rewrite variable handling in generated parsers Before this commit, variables for saving match results and parse positions in generated parsers were not used efficiently. Each rule basically used its own variable(s) for storing the data, with names generated sequentially during code emitting. There was no reuse of variables and a lot of unnecessary assignments between them. It is easy to see that both match results and parse positions can actually be stored on a stack that grows as the parser walks deeper in the grammar tree and shrinks as it returns. Moreover, if one creates a new stack for each rule the parser enters, its maximum depth can be computed statically from the grammar. This allows us to implement the stack not as an array, but as a set of numbered variables in each function that handles parsing of a grammar rule, avoiding potentially slow array accesses. This commit implements the idea from the previous paragraph, using separate stack for match results and for parse positions. As a result, defined variables are reused and unnecessary copying avoided. Speed implications ------------------ This change speeds up the benchmark suite execution by 2.14%. Detailed results (benchmark suite totals as reported by "jake benchmark" on Node.js 0.4.8): ----------------------------------- Test # Before After ----------------------------------- 1 129.01 kB/s 131.98 kB/s 2 129.39 kB/s 130.13 kB/s 3 128.63 kB/s 132.57 kB/s 4 127.53 kB/s 129.82 kB/s 5 127.98 kB/s 131.80 kB/s ----------------------------------- Average 128.51 kB/s 131.26 kB/s ----------------------------------- Size implications ----------------- This change makes a sample of generated parsers 8.60% smaller: Before: $ wc -c src/parser.js examples/*.js 110867 src/parser.js 13886 examples/arithmetics.js 450125 examples/css.js 632390 examples/javascript.js 61365 examples/json.js 1268633 total After: $ wc -c src/parser.js examples/*.js 99597 src/parser.js 13077 examples/arithmetics.js 399893 examples/css.js 592044 examples/javascript.js 54797 examples/json.js 1159408 total
13 years ago
},
labeled: computeFromExpression(0, 0),
simple_and: computeFromExpression(0, 1),
simple_not: computeFromExpression(0, 1),
semantic_and: computeZeroes,
semantic_not: computeZeroes,
optional: computeFromExpression(0, 0),
zero_or_more: computeFromExpression(1, 0),
one_or_more: computeFromExpression(1, 0),
action: computeFromExpression(0, 1),
rule_ref: computeZeroes,
literal: computeZeroes,
any: computeZeroes,
"class": computeZeroes
});
compute(ast);
return ast;
}
};