pegjs/test/passes-test.js
David Majda d123cf0eda Rewrite variable handling in generated parsers
Before this commit, variables for saving match results and parse
positions in generated parsers were not used efficiently. Each rule
basically used its own variable(s) for storing the data, with names
generated sequentially during code emitting. There was no reuse of
variables and a lot of unnecessary assignments between them.

It is easy to see that both match results and parse positions can
actually be stored on a stack that grows as the parser walks deeper in
the grammar tree and shrinks as it returns. Moreover, if one creates a
new stack for each rule the parser enters, its maximum depth can be
computed statically from the grammar. This allows us to implement the
stack not as an array, but as a set of numbered variables in each
function that handles parsing of a grammar rule, avoiding potentially
slow array accesses.

This commit implements the idea from the previous paragraph, using
separate stack for match results and for parse positions. As a result,
defined variables are reused and unnecessary copying avoided.

Speed implications
------------------

This change speeds up the benchmark suite execution by 2.14%.

Detailed results (benchmark suite totals as reported by "jake benchmark"
on Node.js 0.4.8):

-----------------------------------
 Test #      Before        After
-----------------------------------
      1   129.01 kB/s   131.98 kB/s
      2   129.39 kB/s   130.13 kB/s
      3   128.63 kB/s   132.57 kB/s
      4   127.53 kB/s   129.82 kB/s
      5   127.98 kB/s   131.80 kB/s
-----------------------------------
Average   128.51 kB/s   131.26 kB/s
-----------------------------------

Size implications
-----------------

This change makes a sample of generated parsers 8.60% smaller:

Before:

  $ wc -c src/parser.js examples/*.js
   110867 src/parser.js
    13886 examples/arithmetics.js
   450125 examples/css.js
   632390 examples/javascript.js
    61365 examples/json.js
  1268633 total

After:

  $ wc -c src/parser.js examples/*.js
    99597 src/parser.js
    13077 examples/arithmetics.js
   399893 examples/css.js
   592044 examples/javascript.js
    54797 examples/json.js
  1159408 total
2011-09-01 20:15:54 +02:00

237 lines
6.5 KiB
JavaScript

(function() {
module("PEG.compiler.passes");
test("removes proxy rules", function() {
function simpleGrammar(rules, startRule) {
return {
type: "grammar",
initializer: null,
rules: rules,
startRule: startRule
}
}
var proxiedRule = {
type: "rule",
name: "proxied",
displayName: null,
expression: { type: "literal", value: "a" }
};
var proxiedRuleRef = {
type: "rule_ref",
name: "proxied"
}
function simpleGrammarWithStartAndProxied(startRuleExpression) {
return simpleGrammar(
{
start: {
type: "rule",
name: "start",
displayName: null,
expression: startRuleExpression
},
proxied: proxiedRule
},
"start"
);
}
var cases = [
{
grammar: 'start = proxy; proxy = proxied; proxied = "a"',
ast: simpleGrammar({ proxied: proxiedRule }, "proxied")
},
{
grammar: 'start = proxy / "a" / "b"; proxy = proxied; proxied = "a"',
ast: simpleGrammarWithStartAndProxied({
type: "choice",
alternatives: [
proxiedRuleRef,
{ type: "literal", value: "a" },
{ type: "literal", value: "b" }
]
})
},
{
grammar: 'start = "a" / "b" / proxy; proxy = proxied; proxied = "a"',
ast: simpleGrammarWithStartAndProxied({
type: "choice",
alternatives: [
{ type: "literal", value: "a" },
{ type: "literal", value: "b" },
proxiedRuleRef
]
})
},
{
grammar: 'start = proxy "a" "b"; proxy = proxied; proxied = "a"',
ast: simpleGrammarWithStartAndProxied({
type: "sequence",
elements: [
proxiedRuleRef,
{ type: "literal", value: "a" },
{ type: "literal", value: "b" }
]
})
},
{
grammar: 'start = "a" "b" proxy; proxy = proxied; proxied = "a"',
ast: simpleGrammarWithStartAndProxied({
type: "sequence",
elements: [
{ type: "literal", value: "a" },
{ type: "literal", value: "b" },
proxiedRuleRef
]
})
},
{
grammar: 'start = label:proxy; proxy = proxied; proxied = "a"',
ast: simpleGrammarWithStartAndProxied({
type: "labeled",
label: "label",
expression: proxiedRuleRef
})
},
{
grammar: 'start = &proxy; proxy = proxied; proxied = "a"',
ast: simpleGrammarWithStartAndProxied({
type: "simple_and",
expression: proxiedRuleRef
})
},
{
grammar: 'start = !proxy; proxy = proxied; proxied = "a"',
ast: simpleGrammarWithStartAndProxied({
type: "simple_not",
expression: proxiedRuleRef
})
},
{
grammar: 'start = proxy?; proxy = proxied; proxied = "a"',
ast: simpleGrammarWithStartAndProxied({
type: "optional",
expression: proxiedRuleRef
})
},
{
grammar: 'start = proxy*; proxy = proxied; proxied = "a"',
ast: simpleGrammarWithStartAndProxied({
type: "zero_or_more",
expression: proxiedRuleRef
})
},
{
grammar: 'start = proxy+; proxy = proxied; proxied = "a"',
ast: simpleGrammarWithStartAndProxied({
type: "one_or_more",
expression: proxiedRuleRef
})
},
{
grammar: 'start = proxy { }; proxy = proxied; proxied = "a"',
ast: simpleGrammarWithStartAndProxied({
type: "action",
code: " ",
expression: proxiedRuleRef
})
}
];
for (var i = 0; i < cases.length; i++) {
var ast = PEG.parser.parse(cases[i].grammar);
deepEqual(PEG.compiler.passes.proxyRules(ast), cases[i].ast);
}
});
test("computes stack depths", function() {
var cases = [
/* Choice */
{
grammar: 'start = "a" / "b" / "c"',
resultStackDepth: 1,
posStackDepth: 1
},
{
grammar: 'start = "a" / "b"* / "c"',
resultStackDepth: 2,
posStackDepth: 1
},
{
grammar: 'start = "a" / &"b" / "c"',
resultStackDepth: 1,
posStackDepth: 2
},
/* Sequence */
{
grammar: 'start = "a" "b" "c"',
resultStackDepth: 4,
posStackDepth: 2
},
{
grammar: 'start = "a" "b" "c"*',
resultStackDepth: 5,
posStackDepth: 2
},
{
grammar: 'start = "a" "b"* "c"',
resultStackDepth: 4,
posStackDepth: 2
},
{
grammar: 'start = "a" ("b"*)* "c"',
resultStackDepth: 5,
posStackDepth: 2
},
{
grammar: 'start = "a"* "b" "c"',
resultStackDepth: 4,
posStackDepth: 2
},
{
grammar: 'start = ("a"*)* "b" "c"',
resultStackDepth: 4,
posStackDepth: 2
},
{
grammar: 'start = (("a"*)*)* "b" "c"',
resultStackDepth: 5,
posStackDepth: 2
},
{
grammar: 'start = "a" &"b" "c"',
resultStackDepth: 4,
posStackDepth: 3
},
/* Others */
{ grammar: 'start = label:"a"', resultStackDepth: 1, posStackDepth: 1 },
{ grammar: 'start = &"a"', resultStackDepth: 1, posStackDepth: 2 },
{ grammar: 'start = !"a"', resultStackDepth: 1, posStackDepth: 2 },
{ grammar: 'start = &{ code }', resultStackDepth: 1, posStackDepth: 1 },
{ grammar: 'start = !{ code }', resultStackDepth: 1, posStackDepth: 1 },
{ grammar: 'start = "a"?', resultStackDepth: 1, posStackDepth: 1 },
{ grammar: 'start = "a"*', resultStackDepth: 2, posStackDepth: 1 },
{ grammar: 'start = "a"+', resultStackDepth: 2, posStackDepth: 1 },
{ grammar: 'start = "a" { code }', resultStackDepth: 1, posStackDepth: 2 },
{ grammar: 'start = a', resultStackDepth: 1, posStackDepth: 1 },
{ grammar: 'start = "a"', resultStackDepth: 1, posStackDepth: 1 },
{ grammar: 'start = .', resultStackDepth: 1, posStackDepth: 1 },
{ grammar: 'start = [a-z]', resultStackDepth: 1, posStackDepth: 1 }
];
for (var i = 0; i < cases.length; i++) {
var ast = PEG.parser.parse(cases[i].grammar);
PEG.compiler.passes.stackDepths(ast)
deepEqual(ast.rules["start"].resultStackDepth, cases[i].resultStackDepth);
deepEqual(ast.rules["start"].posStackDepth, cases[i].posStackDepth);
}
});
})();