pegjs/test/passes-test.js

608 lines
16 KiB
JavaScript
Raw Normal View History

(function() {
module("PEG.compiler.passes");
test("reports missing referenced rules", function() {
function testGrammar(grammar) {
raises(
function() {
var ast = PEG.parser.parse(grammar);
PEG.compiler.passes.reportMissingRules(ast);
},
function(e) {
return e instanceof PEG.GrammarError
&& e.message === "Referenced rule \"missing\" does not exist.";
}
);
}
var grammars = [
'start = missing',
'start = missing / "a" / "b"',
'start = "a" / "b" / missing',
'start = missing "a" "b"',
'start = "a" "b" missing',
'start = label:missing',
'start = &missing',
'start = !missing',
'start = missing?',
'start = missing*',
'start = missing+',
'start = missing { }'
];
for (var i = 0; i < grammars.length; i++) { testGrammar(grammars[i]); }
});
test("reports left recursion", function() {
function testGrammar(grammar) {
raises(
function() {
var ast = PEG.parser.parse(grammar);
PEG.compiler.passes.reportLeftRecursion(ast);
},
function(e) {
return e instanceof PEG.GrammarError
&& e.message === "Left recursion detected for rule \"start\".";
}
);
}
var grammars = [
/* Direct */
'start = start',
'start = start / "a" / "b"',
'start = "a" / "b" / start',
'start = start "a" "b"',
'start = label:start',
'start = &start',
'start = !start',
'start = start?',
'start = start*',
'start = start+',
'start = start { }',
/* Indirect */
'start = stop; stop = start'
];
for (var i = 0; i < grammars.length; i++) { testGrammar(grammars[i]); }
});
test("removes proxy rules", function() {
function simpleGrammar(rules, startRule) {
return {
type: "grammar",
initializer: null,
rules: rules,
startRule: startRule
};
}
var proxiedRule = {
type: "rule",
name: "proxied",
displayName: null,
expression: { type: "literal", value: "a", ignoreCase: false }
};
var proxiedRuleRef = {
type: "rule_ref",
name: "proxied"
};
function simpleGrammarWithStartAndProxied(startRuleExpression) {
return simpleGrammar(
[
{
type: "rule",
name: "start",
displayName: null,
expression: startRuleExpression
},
proxiedRule
],
"start"
);
}
var cases = [
{
grammar: 'start = proxy; proxy = proxied; proxied = "a"',
ast: simpleGrammar([proxiedRule], "proxied")
},
{
grammar: 'start = proxy / "a" / "b"; proxy = proxied; proxied = "a"',
ast: simpleGrammarWithStartAndProxied({
type: "choice",
alternatives: [
proxiedRuleRef,
{ type: "literal", value: "a", ignoreCase: false },
{ type: "literal", value: "b", ignoreCase: false }
]
})
},
{
grammar: 'start = "a" / "b" / proxy; proxy = proxied; proxied = "a"',
ast: simpleGrammarWithStartAndProxied({
type: "choice",
alternatives: [
{ type: "literal", value: "a", ignoreCase: false },
{ type: "literal", value: "b", ignoreCase: false },
proxiedRuleRef
]
})
},
{
grammar: 'start = proxy "a" "b"; proxy = proxied; proxied = "a"',
ast: simpleGrammarWithStartAndProxied({
type: "sequence",
elements: [
proxiedRuleRef,
{ type: "literal", value: "a", ignoreCase: false },
{ type: "literal", value: "b", ignoreCase: false }
]
})
},
{
grammar: 'start = "a" "b" proxy; proxy = proxied; proxied = "a"',
ast: simpleGrammarWithStartAndProxied({
type: "sequence",
elements: [
{ type: "literal", value: "a", ignoreCase: false },
{ type: "literal", value: "b", ignoreCase: false },
proxiedRuleRef
]
})
},
{
grammar: 'start = label:proxy; proxy = proxied; proxied = "a"',
ast: simpleGrammarWithStartAndProxied({
type: "labeled",
label: "label",
expression: proxiedRuleRef
})
},
{
grammar: 'start = &proxy; proxy = proxied; proxied = "a"',
ast: simpleGrammarWithStartAndProxied({
type: "simple_and",
expression: proxiedRuleRef
})
},
{
grammar: 'start = !proxy; proxy = proxied; proxied = "a"',
ast: simpleGrammarWithStartAndProxied({
type: "simple_not",
expression: proxiedRuleRef
})
},
{
grammar: 'start = proxy?; proxy = proxied; proxied = "a"',
ast: simpleGrammarWithStartAndProxied({
type: "optional",
expression: proxiedRuleRef
})
},
{
grammar: 'start = proxy*; proxy = proxied; proxied = "a"',
ast: simpleGrammarWithStartAndProxied({
type: "zero_or_more",
expression: proxiedRuleRef
})
},
{
grammar: 'start = proxy+; proxy = proxied; proxied = "a"',
ast: simpleGrammarWithStartAndProxied({
type: "one_or_more",
expression: proxiedRuleRef
})
},
{
grammar: 'start = proxy { }; proxy = proxied; proxied = "a"',
ast: simpleGrammarWithStartAndProxied({
type: "action",
code: " ",
expression: proxiedRuleRef
})
}
];
for (var i = 0; i < cases.length; i++) {
var ast = PEG.parser.parse(cases[i].grammar);
PEG.compiler.passes.removeProxyRules(ast);
deepEqual(ast, cases[i].ast);
}
});
test("computes variable names", function() {
var leafDetails = { resultVar: "result0" },
choiceDetails = {
resultVar: "result0",
alternatives: [
{ resultVar: "result0", posVar: "pos0" },
{ resultVar: "result0", posVar: "pos0" },
{ resultVar: "result0", posVar: "pos0" }
]
},
sequenceDetails = {
resultVar: "result0",
posVar: "pos0",
elements: [
{ resultVar: "result0", posVar: "pos1" },
{ resultVar: "result1", posVar: "pos1" },
{ resultVar: "result2", posVar: "pos1" }
]
};
Rewrite variable handling in generated parsers Before this commit, variables for saving match results and parse positions in generated parsers were not used efficiently. Each rule basically used its own variable(s) for storing the data, with names generated sequentially during code emitting. There was no reuse of variables and a lot of unnecessary assignments between them. It is easy to see that both match results and parse positions can actually be stored on a stack that grows as the parser walks deeper in the grammar tree and shrinks as it returns. Moreover, if one creates a new stack for each rule the parser enters, its maximum depth can be computed statically from the grammar. This allows us to implement the stack not as an array, but as a set of numbered variables in each function that handles parsing of a grammar rule, avoiding potentially slow array accesses. This commit implements the idea from the previous paragraph, using separate stack for match results and for parse positions. As a result, defined variables are reused and unnecessary copying avoided. Speed implications ------------------ This change speeds up the benchmark suite execution by 2.14%. Detailed results (benchmark suite totals as reported by "jake benchmark" on Node.js 0.4.8): ----------------------------------- Test # Before After ----------------------------------- 1 129.01 kB/s 131.98 kB/s 2 129.39 kB/s 130.13 kB/s 3 128.63 kB/s 132.57 kB/s 4 127.53 kB/s 129.82 kB/s 5 127.98 kB/s 131.80 kB/s ----------------------------------- Average 128.51 kB/s 131.26 kB/s ----------------------------------- Size implications ----------------- This change makes a sample of generated parsers 8.60% smaller: Before: $ wc -c src/parser.js examples/*.js 110867 src/parser.js 13886 examples/arithmetics.js 450125 examples/css.js 632390 examples/javascript.js 61365 examples/json.js 1268633 total After: $ wc -c src/parser.js examples/*.js 99597 src/parser.js 13077 examples/arithmetics.js 399893 examples/css.js 592044 examples/javascript.js 54797 examples/json.js 1159408 total
2011-08-22 19:48:43 +02:00
var cases = [
/* Choice */
{
grammar: 'start = &"a" / &"b" / &"c"',
resultVars: ["result0"],
posVars: ["pos0"],
details: choiceDetails
Rewrite variable handling in generated parsers Before this commit, variables for saving match results and parse positions in generated parsers were not used efficiently. Each rule basically used its own variable(s) for storing the data, with names generated sequentially during code emitting. There was no reuse of variables and a lot of unnecessary assignments between them. It is easy to see that both match results and parse positions can actually be stored on a stack that grows as the parser walks deeper in the grammar tree and shrinks as it returns. Moreover, if one creates a new stack for each rule the parser enters, its maximum depth can be computed statically from the grammar. This allows us to implement the stack not as an array, but as a set of numbered variables in each function that handles parsing of a grammar rule, avoiding potentially slow array accesses. This commit implements the idea from the previous paragraph, using separate stack for match results and for parse positions. As a result, defined variables are reused and unnecessary copying avoided. Speed implications ------------------ This change speeds up the benchmark suite execution by 2.14%. Detailed results (benchmark suite totals as reported by "jake benchmark" on Node.js 0.4.8): ----------------------------------- Test # Before After ----------------------------------- 1 129.01 kB/s 131.98 kB/s 2 129.39 kB/s 130.13 kB/s 3 128.63 kB/s 132.57 kB/s 4 127.53 kB/s 129.82 kB/s 5 127.98 kB/s 131.80 kB/s ----------------------------------- Average 128.51 kB/s 131.26 kB/s ----------------------------------- Size implications ----------------- This change makes a sample of generated parsers 8.60% smaller: Before: $ wc -c src/parser.js examples/*.js 110867 src/parser.js 13886 examples/arithmetics.js 450125 examples/css.js 632390 examples/javascript.js 61365 examples/json.js 1268633 total After: $ wc -c src/parser.js examples/*.js 99597 src/parser.js 13077 examples/arithmetics.js 399893 examples/css.js 592044 examples/javascript.js 54797 examples/json.js 1159408 total
2011-08-22 19:48:43 +02:00
},
{
grammar: 'start = &"a" / &"b"* / &"c"',
resultVars: ["result0", "result1"],
posVars: ["pos0"],
details: choiceDetails
Rewrite variable handling in generated parsers Before this commit, variables for saving match results and parse positions in generated parsers were not used efficiently. Each rule basically used its own variable(s) for storing the data, with names generated sequentially during code emitting. There was no reuse of variables and a lot of unnecessary assignments between them. It is easy to see that both match results and parse positions can actually be stored on a stack that grows as the parser walks deeper in the grammar tree and shrinks as it returns. Moreover, if one creates a new stack for each rule the parser enters, its maximum depth can be computed statically from the grammar. This allows us to implement the stack not as an array, but as a set of numbered variables in each function that handles parsing of a grammar rule, avoiding potentially slow array accesses. This commit implements the idea from the previous paragraph, using separate stack for match results and for parse positions. As a result, defined variables are reused and unnecessary copying avoided. Speed implications ------------------ This change speeds up the benchmark suite execution by 2.14%. Detailed results (benchmark suite totals as reported by "jake benchmark" on Node.js 0.4.8): ----------------------------------- Test # Before After ----------------------------------- 1 129.01 kB/s 131.98 kB/s 2 129.39 kB/s 130.13 kB/s 3 128.63 kB/s 132.57 kB/s 4 127.53 kB/s 129.82 kB/s 5 127.98 kB/s 131.80 kB/s ----------------------------------- Average 128.51 kB/s 131.26 kB/s ----------------------------------- Size implications ----------------- This change makes a sample of generated parsers 8.60% smaller: Before: $ wc -c src/parser.js examples/*.js 110867 src/parser.js 13886 examples/arithmetics.js 450125 examples/css.js 632390 examples/javascript.js 61365 examples/json.js 1268633 total After: $ wc -c src/parser.js examples/*.js 99597 src/parser.js 13077 examples/arithmetics.js 399893 examples/css.js 592044 examples/javascript.js 54797 examples/json.js 1159408 total
2011-08-22 19:48:43 +02:00
},
{
grammar: 'start = &"a" / &(&"b") / &"c"',
resultVars: ["result0"],
posVars: ["pos0", "pos1"],
details: choiceDetails
Rewrite variable handling in generated parsers Before this commit, variables for saving match results and parse positions in generated parsers were not used efficiently. Each rule basically used its own variable(s) for storing the data, with names generated sequentially during code emitting. There was no reuse of variables and a lot of unnecessary assignments between them. It is easy to see that both match results and parse positions can actually be stored on a stack that grows as the parser walks deeper in the grammar tree and shrinks as it returns. Moreover, if one creates a new stack for each rule the parser enters, its maximum depth can be computed statically from the grammar. This allows us to implement the stack not as an array, but as a set of numbered variables in each function that handles parsing of a grammar rule, avoiding potentially slow array accesses. This commit implements the idea from the previous paragraph, using separate stack for match results and for parse positions. As a result, defined variables are reused and unnecessary copying avoided. Speed implications ------------------ This change speeds up the benchmark suite execution by 2.14%. Detailed results (benchmark suite totals as reported by "jake benchmark" on Node.js 0.4.8): ----------------------------------- Test # Before After ----------------------------------- 1 129.01 kB/s 131.98 kB/s 2 129.39 kB/s 130.13 kB/s 3 128.63 kB/s 132.57 kB/s 4 127.53 kB/s 129.82 kB/s 5 127.98 kB/s 131.80 kB/s ----------------------------------- Average 128.51 kB/s 131.26 kB/s ----------------------------------- Size implications ----------------- This change makes a sample of generated parsers 8.60% smaller: Before: $ wc -c src/parser.js examples/*.js 110867 src/parser.js 13886 examples/arithmetics.js 450125 examples/css.js 632390 examples/javascript.js 61365 examples/json.js 1268633 total After: $ wc -c src/parser.js examples/*.js 99597 src/parser.js 13077 examples/arithmetics.js 399893 examples/css.js 592044 examples/javascript.js 54797 examples/json.js 1159408 total
2011-08-22 19:48:43 +02:00
},
/* Sequence */
{
grammar: 'start = ',
resultVars: ["result0"],
posVars: ["pos0"],
details: { resultVar: "result0", posVar: "pos0" }
},
Rewrite variable handling in generated parsers Before this commit, variables for saving match results and parse positions in generated parsers were not used efficiently. Each rule basically used its own variable(s) for storing the data, with names generated sequentially during code emitting. There was no reuse of variables and a lot of unnecessary assignments between them. It is easy to see that both match results and parse positions can actually be stored on a stack that grows as the parser walks deeper in the grammar tree and shrinks as it returns. Moreover, if one creates a new stack for each rule the parser enters, its maximum depth can be computed statically from the grammar. This allows us to implement the stack not as an array, but as a set of numbered variables in each function that handles parsing of a grammar rule, avoiding potentially slow array accesses. This commit implements the idea from the previous paragraph, using separate stack for match results and for parse positions. As a result, defined variables are reused and unnecessary copying avoided. Speed implications ------------------ This change speeds up the benchmark suite execution by 2.14%. Detailed results (benchmark suite totals as reported by "jake benchmark" on Node.js 0.4.8): ----------------------------------- Test # Before After ----------------------------------- 1 129.01 kB/s 131.98 kB/s 2 129.39 kB/s 130.13 kB/s 3 128.63 kB/s 132.57 kB/s 4 127.53 kB/s 129.82 kB/s 5 127.98 kB/s 131.80 kB/s ----------------------------------- Average 128.51 kB/s 131.26 kB/s ----------------------------------- Size implications ----------------- This change makes a sample of generated parsers 8.60% smaller: Before: $ wc -c src/parser.js examples/*.js 110867 src/parser.js 13886 examples/arithmetics.js 450125 examples/css.js 632390 examples/javascript.js 61365 examples/json.js 1268633 total After: $ wc -c src/parser.js examples/*.js 99597 src/parser.js 13077 examples/arithmetics.js 399893 examples/css.js 592044 examples/javascript.js 54797 examples/json.js 1159408 total
2011-08-22 19:48:43 +02:00
{
grammar: 'start = &"a" &"b" &"c"',
resultVars: ["result0", "result1", "result2"],
posVars: ["pos0", "pos1"],
details: sequenceDetails
Rewrite variable handling in generated parsers Before this commit, variables for saving match results and parse positions in generated parsers were not used efficiently. Each rule basically used its own variable(s) for storing the data, with names generated sequentially during code emitting. There was no reuse of variables and a lot of unnecessary assignments between them. It is easy to see that both match results and parse positions can actually be stored on a stack that grows as the parser walks deeper in the grammar tree and shrinks as it returns. Moreover, if one creates a new stack for each rule the parser enters, its maximum depth can be computed statically from the grammar. This allows us to implement the stack not as an array, but as a set of numbered variables in each function that handles parsing of a grammar rule, avoiding potentially slow array accesses. This commit implements the idea from the previous paragraph, using separate stack for match results and for parse positions. As a result, defined variables are reused and unnecessary copying avoided. Speed implications ------------------ This change speeds up the benchmark suite execution by 2.14%. Detailed results (benchmark suite totals as reported by "jake benchmark" on Node.js 0.4.8): ----------------------------------- Test # Before After ----------------------------------- 1 129.01 kB/s 131.98 kB/s 2 129.39 kB/s 130.13 kB/s 3 128.63 kB/s 132.57 kB/s 4 127.53 kB/s 129.82 kB/s 5 127.98 kB/s 131.80 kB/s ----------------------------------- Average 128.51 kB/s 131.26 kB/s ----------------------------------- Size implications ----------------- This change makes a sample of generated parsers 8.60% smaller: Before: $ wc -c src/parser.js examples/*.js 110867 src/parser.js 13886 examples/arithmetics.js 450125 examples/css.js 632390 examples/javascript.js 61365 examples/json.js 1268633 total After: $ wc -c src/parser.js examples/*.js 99597 src/parser.js 13077 examples/arithmetics.js 399893 examples/css.js 592044 examples/javascript.js 54797 examples/json.js 1159408 total
2011-08-22 19:48:43 +02:00
},
{
grammar: 'start = &"a" &"b" &"c"*',
resultVars: ["result0", "result1", "result2", "result3"],
posVars: ["pos0", "pos1"],
details: sequenceDetails
Rewrite variable handling in generated parsers Before this commit, variables for saving match results and parse positions in generated parsers were not used efficiently. Each rule basically used its own variable(s) for storing the data, with names generated sequentially during code emitting. There was no reuse of variables and a lot of unnecessary assignments between them. It is easy to see that both match results and parse positions can actually be stored on a stack that grows as the parser walks deeper in the grammar tree and shrinks as it returns. Moreover, if one creates a new stack for each rule the parser enters, its maximum depth can be computed statically from the grammar. This allows us to implement the stack not as an array, but as a set of numbered variables in each function that handles parsing of a grammar rule, avoiding potentially slow array accesses. This commit implements the idea from the previous paragraph, using separate stack for match results and for parse positions. As a result, defined variables are reused and unnecessary copying avoided. Speed implications ------------------ This change speeds up the benchmark suite execution by 2.14%. Detailed results (benchmark suite totals as reported by "jake benchmark" on Node.js 0.4.8): ----------------------------------- Test # Before After ----------------------------------- 1 129.01 kB/s 131.98 kB/s 2 129.39 kB/s 130.13 kB/s 3 128.63 kB/s 132.57 kB/s 4 127.53 kB/s 129.82 kB/s 5 127.98 kB/s 131.80 kB/s ----------------------------------- Average 128.51 kB/s 131.26 kB/s ----------------------------------- Size implications ----------------- This change makes a sample of generated parsers 8.60% smaller: Before: $ wc -c src/parser.js examples/*.js 110867 src/parser.js 13886 examples/arithmetics.js 450125 examples/css.js 632390 examples/javascript.js 61365 examples/json.js 1268633 total After: $ wc -c src/parser.js examples/*.js 99597 src/parser.js 13077 examples/arithmetics.js 399893 examples/css.js 592044 examples/javascript.js 54797 examples/json.js 1159408 total
2011-08-22 19:48:43 +02:00
},
{
grammar: 'start = &"a" &"b"* &"c"',
resultVars: ["result0", "result1", "result2"],
posVars: ["pos0", "pos1"],
details: sequenceDetails
Rewrite variable handling in generated parsers Before this commit, variables for saving match results and parse positions in generated parsers were not used efficiently. Each rule basically used its own variable(s) for storing the data, with names generated sequentially during code emitting. There was no reuse of variables and a lot of unnecessary assignments between them. It is easy to see that both match results and parse positions can actually be stored on a stack that grows as the parser walks deeper in the grammar tree and shrinks as it returns. Moreover, if one creates a new stack for each rule the parser enters, its maximum depth can be computed statically from the grammar. This allows us to implement the stack not as an array, but as a set of numbered variables in each function that handles parsing of a grammar rule, avoiding potentially slow array accesses. This commit implements the idea from the previous paragraph, using separate stack for match results and for parse positions. As a result, defined variables are reused and unnecessary copying avoided. Speed implications ------------------ This change speeds up the benchmark suite execution by 2.14%. Detailed results (benchmark suite totals as reported by "jake benchmark" on Node.js 0.4.8): ----------------------------------- Test # Before After ----------------------------------- 1 129.01 kB/s 131.98 kB/s 2 129.39 kB/s 130.13 kB/s 3 128.63 kB/s 132.57 kB/s 4 127.53 kB/s 129.82 kB/s 5 127.98 kB/s 131.80 kB/s ----------------------------------- Average 128.51 kB/s 131.26 kB/s ----------------------------------- Size implications ----------------- This change makes a sample of generated parsers 8.60% smaller: Before: $ wc -c src/parser.js examples/*.js 110867 src/parser.js 13886 examples/arithmetics.js 450125 examples/css.js 632390 examples/javascript.js 61365 examples/json.js 1268633 total After: $ wc -c src/parser.js examples/*.js 99597 src/parser.js 13077 examples/arithmetics.js 399893 examples/css.js 592044 examples/javascript.js 54797 examples/json.js 1159408 total
2011-08-22 19:48:43 +02:00
},
{
grammar: 'start = &"a" &("b"*)* &"c"',
resultVars: ["result0", "result1", "result2", "result3"],
posVars: ["pos0", "pos1"],
details: sequenceDetails
Rewrite variable handling in generated parsers Before this commit, variables for saving match results and parse positions in generated parsers were not used efficiently. Each rule basically used its own variable(s) for storing the data, with names generated sequentially during code emitting. There was no reuse of variables and a lot of unnecessary assignments between them. It is easy to see that both match results and parse positions can actually be stored on a stack that grows as the parser walks deeper in the grammar tree and shrinks as it returns. Moreover, if one creates a new stack for each rule the parser enters, its maximum depth can be computed statically from the grammar. This allows us to implement the stack not as an array, but as a set of numbered variables in each function that handles parsing of a grammar rule, avoiding potentially slow array accesses. This commit implements the idea from the previous paragraph, using separate stack for match results and for parse positions. As a result, defined variables are reused and unnecessary copying avoided. Speed implications ------------------ This change speeds up the benchmark suite execution by 2.14%. Detailed results (benchmark suite totals as reported by "jake benchmark" on Node.js 0.4.8): ----------------------------------- Test # Before After ----------------------------------- 1 129.01 kB/s 131.98 kB/s 2 129.39 kB/s 130.13 kB/s 3 128.63 kB/s 132.57 kB/s 4 127.53 kB/s 129.82 kB/s 5 127.98 kB/s 131.80 kB/s ----------------------------------- Average 128.51 kB/s 131.26 kB/s ----------------------------------- Size implications ----------------- This change makes a sample of generated parsers 8.60% smaller: Before: $ wc -c src/parser.js examples/*.js 110867 src/parser.js 13886 examples/arithmetics.js 450125 examples/css.js 632390 examples/javascript.js 61365 examples/json.js 1268633 total After: $ wc -c src/parser.js examples/*.js 99597 src/parser.js 13077 examples/arithmetics.js 399893 examples/css.js 592044 examples/javascript.js 54797 examples/json.js 1159408 total
2011-08-22 19:48:43 +02:00
},
{
grammar: 'start = &"a"* &"b" &"c"',
resultVars: ["result0", "result1", "result2"],
posVars: ["pos0", "pos1"],
details: sequenceDetails
Rewrite variable handling in generated parsers Before this commit, variables for saving match results and parse positions in generated parsers were not used efficiently. Each rule basically used its own variable(s) for storing the data, with names generated sequentially during code emitting. There was no reuse of variables and a lot of unnecessary assignments between them. It is easy to see that both match results and parse positions can actually be stored on a stack that grows as the parser walks deeper in the grammar tree and shrinks as it returns. Moreover, if one creates a new stack for each rule the parser enters, its maximum depth can be computed statically from the grammar. This allows us to implement the stack not as an array, but as a set of numbered variables in each function that handles parsing of a grammar rule, avoiding potentially slow array accesses. This commit implements the idea from the previous paragraph, using separate stack for match results and for parse positions. As a result, defined variables are reused and unnecessary copying avoided. Speed implications ------------------ This change speeds up the benchmark suite execution by 2.14%. Detailed results (benchmark suite totals as reported by "jake benchmark" on Node.js 0.4.8): ----------------------------------- Test # Before After ----------------------------------- 1 129.01 kB/s 131.98 kB/s 2 129.39 kB/s 130.13 kB/s 3 128.63 kB/s 132.57 kB/s 4 127.53 kB/s 129.82 kB/s 5 127.98 kB/s 131.80 kB/s ----------------------------------- Average 128.51 kB/s 131.26 kB/s ----------------------------------- Size implications ----------------- This change makes a sample of generated parsers 8.60% smaller: Before: $ wc -c src/parser.js examples/*.js 110867 src/parser.js 13886 examples/arithmetics.js 450125 examples/css.js 632390 examples/javascript.js 61365 examples/json.js 1268633 total After: $ wc -c src/parser.js examples/*.js 99597 src/parser.js 13077 examples/arithmetics.js 399893 examples/css.js 592044 examples/javascript.js 54797 examples/json.js 1159408 total
2011-08-22 19:48:43 +02:00
},
{
grammar: 'start = &("a"*)* &"b" &"c"',
resultVars: ["result0", "result1", "result2"],
posVars: ["pos0", "pos1"],
details: sequenceDetails
Rewrite variable handling in generated parsers Before this commit, variables for saving match results and parse positions in generated parsers were not used efficiently. Each rule basically used its own variable(s) for storing the data, with names generated sequentially during code emitting. There was no reuse of variables and a lot of unnecessary assignments between them. It is easy to see that both match results and parse positions can actually be stored on a stack that grows as the parser walks deeper in the grammar tree and shrinks as it returns. Moreover, if one creates a new stack for each rule the parser enters, its maximum depth can be computed statically from the grammar. This allows us to implement the stack not as an array, but as a set of numbered variables in each function that handles parsing of a grammar rule, avoiding potentially slow array accesses. This commit implements the idea from the previous paragraph, using separate stack for match results and for parse positions. As a result, defined variables are reused and unnecessary copying avoided. Speed implications ------------------ This change speeds up the benchmark suite execution by 2.14%. Detailed results (benchmark suite totals as reported by "jake benchmark" on Node.js 0.4.8): ----------------------------------- Test # Before After ----------------------------------- 1 129.01 kB/s 131.98 kB/s 2 129.39 kB/s 130.13 kB/s 3 128.63 kB/s 132.57 kB/s 4 127.53 kB/s 129.82 kB/s 5 127.98 kB/s 131.80 kB/s ----------------------------------- Average 128.51 kB/s 131.26 kB/s ----------------------------------- Size implications ----------------- This change makes a sample of generated parsers 8.60% smaller: Before: $ wc -c src/parser.js examples/*.js 110867 src/parser.js 13886 examples/arithmetics.js 450125 examples/css.js 632390 examples/javascript.js 61365 examples/json.js 1268633 total After: $ wc -c src/parser.js examples/*.js 99597 src/parser.js 13077 examples/arithmetics.js 399893 examples/css.js 592044 examples/javascript.js 54797 examples/json.js 1159408 total
2011-08-22 19:48:43 +02:00
},
{
grammar: 'start = &(("a"*)*)* &"b" &"c"',
resultVars: ["result0", "result1", "result2", "result3"],
posVars: ["pos0", "pos1"],
details: sequenceDetails
Rewrite variable handling in generated parsers Before this commit, variables for saving match results and parse positions in generated parsers were not used efficiently. Each rule basically used its own variable(s) for storing the data, with names generated sequentially during code emitting. There was no reuse of variables and a lot of unnecessary assignments between them. It is easy to see that both match results and parse positions can actually be stored on a stack that grows as the parser walks deeper in the grammar tree and shrinks as it returns. Moreover, if one creates a new stack for each rule the parser enters, its maximum depth can be computed statically from the grammar. This allows us to implement the stack not as an array, but as a set of numbered variables in each function that handles parsing of a grammar rule, avoiding potentially slow array accesses. This commit implements the idea from the previous paragraph, using separate stack for match results and for parse positions. As a result, defined variables are reused and unnecessary copying avoided. Speed implications ------------------ This change speeds up the benchmark suite execution by 2.14%. Detailed results (benchmark suite totals as reported by "jake benchmark" on Node.js 0.4.8): ----------------------------------- Test # Before After ----------------------------------- 1 129.01 kB/s 131.98 kB/s 2 129.39 kB/s 130.13 kB/s 3 128.63 kB/s 132.57 kB/s 4 127.53 kB/s 129.82 kB/s 5 127.98 kB/s 131.80 kB/s ----------------------------------- Average 128.51 kB/s 131.26 kB/s ----------------------------------- Size implications ----------------- This change makes a sample of generated parsers 8.60% smaller: Before: $ wc -c src/parser.js examples/*.js 110867 src/parser.js 13886 examples/arithmetics.js 450125 examples/css.js 632390 examples/javascript.js 61365 examples/json.js 1268633 total After: $ wc -c src/parser.js examples/*.js 99597 src/parser.js 13077 examples/arithmetics.js 399893 examples/css.js 592044 examples/javascript.js 54797 examples/json.js 1159408 total
2011-08-22 19:48:43 +02:00
},
{
grammar: 'start = &"a" &(&"b") &"c"',
resultVars: ["result0", "result1", "result2"],
posVars: ["pos0", "pos1", "pos2"],
details: sequenceDetails
Rewrite variable handling in generated parsers Before this commit, variables for saving match results and parse positions in generated parsers were not used efficiently. Each rule basically used its own variable(s) for storing the data, with names generated sequentially during code emitting. There was no reuse of variables and a lot of unnecessary assignments between them. It is easy to see that both match results and parse positions can actually be stored on a stack that grows as the parser walks deeper in the grammar tree and shrinks as it returns. Moreover, if one creates a new stack for each rule the parser enters, its maximum depth can be computed statically from the grammar. This allows us to implement the stack not as an array, but as a set of numbered variables in each function that handles parsing of a grammar rule, avoiding potentially slow array accesses. This commit implements the idea from the previous paragraph, using separate stack for match results and for parse positions. As a result, defined variables are reused and unnecessary copying avoided. Speed implications ------------------ This change speeds up the benchmark suite execution by 2.14%. Detailed results (benchmark suite totals as reported by "jake benchmark" on Node.js 0.4.8): ----------------------------------- Test # Before After ----------------------------------- 1 129.01 kB/s 131.98 kB/s 2 129.39 kB/s 130.13 kB/s 3 128.63 kB/s 132.57 kB/s 4 127.53 kB/s 129.82 kB/s 5 127.98 kB/s 131.80 kB/s ----------------------------------- Average 128.51 kB/s 131.26 kB/s ----------------------------------- Size implications ----------------- This change makes a sample of generated parsers 8.60% smaller: Before: $ wc -c src/parser.js examples/*.js 110867 src/parser.js 13886 examples/arithmetics.js 450125 examples/css.js 632390 examples/javascript.js 61365 examples/json.js 1268633 total After: $ wc -c src/parser.js examples/*.js 99597 src/parser.js 13077 examples/arithmetics.js 399893 examples/css.js 592044 examples/javascript.js 54797 examples/json.js 1159408 total
2011-08-22 19:48:43 +02:00
},
/* Others */
{
grammar: 'start = label:&"a"',
resultVars: ["result0"],
posVars: ["pos0"],
details: {
resultVar: "result0",
expression: { resultVar: "result0", posVar: "pos0" }
}
},
{
grammar: 'start = &(&"a")',
resultVars: ["result0"],
posVars: ["pos0", "pos1"],
details: {
resultVar: "result0",
posVar: "pos0",
expression: { resultVar: "result0", posVar: "pos1" }
}
},
{
grammar: 'start = !(&"a")',
resultVars: ["result0"],
posVars: ["pos0", "pos1"],
details: {
resultVar: "result0",
posVar: "pos0",
expression: { resultVar: "result0", posVar: "pos1" }
}
},
{
grammar: 'start = &{ code }',
resultVars: ["result0"],
posVars: [],
details: leafDetails
},
{
grammar: 'start = !{ code }',
resultVars: ["result0"],
posVars: [],
details: leafDetails
},
{
grammar: 'start = (&"a")?',
resultVars: ["result0"],
posVars: ["pos0"],
details: {
resultVar: "result0",
expression: { resultVar: "result0", posVar: "pos0" }
}
},
{
grammar: 'start = (&"a")*',
resultVars: ["result0", "result1"],
posVars: ["pos0"],
details: {
resultVar: "result0",
expression: { resultVar: "result1", posVar: "pos0" }
}
},
{
grammar: 'start = (&"a")+',
resultVars: ["result0", "result1"],
posVars: ["pos0"],
details: {
resultVar: "result0",
expression: { resultVar: "result1", posVar: "pos0" }
}
},
{
grammar: 'start = &"a" { code }',
resultVars: ["result0"],
posVars: ["pos0", "pos1"],
details: {
resultVar: "result0",
posVar: "pos0",
expression: { resultVar: "result0", posVar: "pos1" }
}
},
{
grammar: 'start = a',
resultVars: ["result0"],
posVars: [],
details: leafDetails
},
{
grammar: 'start = "a"',
resultVars: ["result0"],
posVars: [],
details: leafDetails
},
{
grammar: 'start = .',
resultVars: ["result0"],
posVars: [],
details: leafDetails
},
{
grammar: 'start = [a-z]',
resultVars: ["result0"],
posVars: [],
details: leafDetails
}
Rewrite variable handling in generated parsers Before this commit, variables for saving match results and parse positions in generated parsers were not used efficiently. Each rule basically used its own variable(s) for storing the data, with names generated sequentially during code emitting. There was no reuse of variables and a lot of unnecessary assignments between them. It is easy to see that both match results and parse positions can actually be stored on a stack that grows as the parser walks deeper in the grammar tree and shrinks as it returns. Moreover, if one creates a new stack for each rule the parser enters, its maximum depth can be computed statically from the grammar. This allows us to implement the stack not as an array, but as a set of numbered variables in each function that handles parsing of a grammar rule, avoiding potentially slow array accesses. This commit implements the idea from the previous paragraph, using separate stack for match results and for parse positions. As a result, defined variables are reused and unnecessary copying avoided. Speed implications ------------------ This change speeds up the benchmark suite execution by 2.14%. Detailed results (benchmark suite totals as reported by "jake benchmark" on Node.js 0.4.8): ----------------------------------- Test # Before After ----------------------------------- 1 129.01 kB/s 131.98 kB/s 2 129.39 kB/s 130.13 kB/s 3 128.63 kB/s 132.57 kB/s 4 127.53 kB/s 129.82 kB/s 5 127.98 kB/s 131.80 kB/s ----------------------------------- Average 128.51 kB/s 131.26 kB/s ----------------------------------- Size implications ----------------- This change makes a sample of generated parsers 8.60% smaller: Before: $ wc -c src/parser.js examples/*.js 110867 src/parser.js 13886 examples/arithmetics.js 450125 examples/css.js 632390 examples/javascript.js 61365 examples/json.js 1268633 total After: $ wc -c src/parser.js examples/*.js 99597 src/parser.js 13077 examples/arithmetics.js 399893 examples/css.js 592044 examples/javascript.js 54797 examples/json.js 1159408 total
2011-08-22 19:48:43 +02:00
];
function checkDetails(node, details) {
for (var key in details) {
if (Object.prototype.toString.call(details[key]) === "[object Array]") {
for (var i = 0; i < details[key].length; i++) {
checkDetails(node[key][i], details[key][i]);
}
} else if (typeof details[key] === "object") {
checkDetails(node[key], details[key]);
} else {
strictEqual(node[key], details[key]);
}
}
}
Rewrite variable handling in generated parsers Before this commit, variables for saving match results and parse positions in generated parsers were not used efficiently. Each rule basically used its own variable(s) for storing the data, with names generated sequentially during code emitting. There was no reuse of variables and a lot of unnecessary assignments between them. It is easy to see that both match results and parse positions can actually be stored on a stack that grows as the parser walks deeper in the grammar tree and shrinks as it returns. Moreover, if one creates a new stack for each rule the parser enters, its maximum depth can be computed statically from the grammar. This allows us to implement the stack not as an array, but as a set of numbered variables in each function that handles parsing of a grammar rule, avoiding potentially slow array accesses. This commit implements the idea from the previous paragraph, using separate stack for match results and for parse positions. As a result, defined variables are reused and unnecessary copying avoided. Speed implications ------------------ This change speeds up the benchmark suite execution by 2.14%. Detailed results (benchmark suite totals as reported by "jake benchmark" on Node.js 0.4.8): ----------------------------------- Test # Before After ----------------------------------- 1 129.01 kB/s 131.98 kB/s 2 129.39 kB/s 130.13 kB/s 3 128.63 kB/s 132.57 kB/s 4 127.53 kB/s 129.82 kB/s 5 127.98 kB/s 131.80 kB/s ----------------------------------- Average 128.51 kB/s 131.26 kB/s ----------------------------------- Size implications ----------------- This change makes a sample of generated parsers 8.60% smaller: Before: $ wc -c src/parser.js examples/*.js 110867 src/parser.js 13886 examples/arithmetics.js 450125 examples/css.js 632390 examples/javascript.js 61365 examples/json.js 1268633 total After: $ wc -c src/parser.js examples/*.js 99597 src/parser.js 13077 examples/arithmetics.js 399893 examples/css.js 592044 examples/javascript.js 54797 examples/json.js 1159408 total
2011-08-22 19:48:43 +02:00
for (var i = 0; i < cases.length; i++) {
var ast = PEG.parser.parse(cases[i].grammar);
PEG.compiler.passes.computeVarNames(ast);
Rewrite variable handling in generated parsers Before this commit, variables for saving match results and parse positions in generated parsers were not used efficiently. Each rule basically used its own variable(s) for storing the data, with names generated sequentially during code emitting. There was no reuse of variables and a lot of unnecessary assignments between them. It is easy to see that both match results and parse positions can actually be stored on a stack that grows as the parser walks deeper in the grammar tree and shrinks as it returns. Moreover, if one creates a new stack for each rule the parser enters, its maximum depth can be computed statically from the grammar. This allows us to implement the stack not as an array, but as a set of numbered variables in each function that handles parsing of a grammar rule, avoiding potentially slow array accesses. This commit implements the idea from the previous paragraph, using separate stack for match results and for parse positions. As a result, defined variables are reused and unnecessary copying avoided. Speed implications ------------------ This change speeds up the benchmark suite execution by 2.14%. Detailed results (benchmark suite totals as reported by "jake benchmark" on Node.js 0.4.8): ----------------------------------- Test # Before After ----------------------------------- 1 129.01 kB/s 131.98 kB/s 2 129.39 kB/s 130.13 kB/s 3 128.63 kB/s 132.57 kB/s 4 127.53 kB/s 129.82 kB/s 5 127.98 kB/s 131.80 kB/s ----------------------------------- Average 128.51 kB/s 131.26 kB/s ----------------------------------- Size implications ----------------- This change makes a sample of generated parsers 8.60% smaller: Before: $ wc -c src/parser.js examples/*.js 110867 src/parser.js 13886 examples/arithmetics.js 450125 examples/css.js 632390 examples/javascript.js 61365 examples/json.js 1268633 total After: $ wc -c src/parser.js examples/*.js 99597 src/parser.js 13077 examples/arithmetics.js 399893 examples/css.js 592044 examples/javascript.js 54797 examples/json.js 1159408 total
2011-08-22 19:48:43 +02:00
deepEqual(ast.rules[0].resultVars, cases[i].resultVars);
deepEqual(ast.rules[0].posVars, cases[i].posVars);
checkDetails(ast.rules[0].expression, cases[i].details);
Rewrite variable handling in generated parsers Before this commit, variables for saving match results and parse positions in generated parsers were not used efficiently. Each rule basically used its own variable(s) for storing the data, with names generated sequentially during code emitting. There was no reuse of variables and a lot of unnecessary assignments between them. It is easy to see that both match results and parse positions can actually be stored on a stack that grows as the parser walks deeper in the grammar tree and shrinks as it returns. Moreover, if one creates a new stack for each rule the parser enters, its maximum depth can be computed statically from the grammar. This allows us to implement the stack not as an array, but as a set of numbered variables in each function that handles parsing of a grammar rule, avoiding potentially slow array accesses. This commit implements the idea from the previous paragraph, using separate stack for match results and for parse positions. As a result, defined variables are reused and unnecessary copying avoided. Speed implications ------------------ This change speeds up the benchmark suite execution by 2.14%. Detailed results (benchmark suite totals as reported by "jake benchmark" on Node.js 0.4.8): ----------------------------------- Test # Before After ----------------------------------- 1 129.01 kB/s 131.98 kB/s 2 129.39 kB/s 130.13 kB/s 3 128.63 kB/s 132.57 kB/s 4 127.53 kB/s 129.82 kB/s 5 127.98 kB/s 131.80 kB/s ----------------------------------- Average 128.51 kB/s 131.26 kB/s ----------------------------------- Size implications ----------------- This change makes a sample of generated parsers 8.60% smaller: Before: $ wc -c src/parser.js examples/*.js 110867 src/parser.js 13886 examples/arithmetics.js 450125 examples/css.js 632390 examples/javascript.js 61365 examples/json.js 1268633 total After: $ wc -c src/parser.js examples/*.js 99597 src/parser.js 13077 examples/arithmetics.js 399893 examples/css.js 592044 examples/javascript.js 54797 examples/json.js 1159408 total
2011-08-22 19:48:43 +02:00
}
});
test("computes params", function() {
function extractNode(node) { return node; }
function extractExpression(node) { return node.expression; }
var cases = [
/* Bacics */
{
grammar: 'start = a:"a" { }',
extractor: extractNode,
params: { a: "result0" }
},
{
grammar: 'start = a:"a" &{ }',
extractor: function(node) { return node.elements[1]; },
params: { a: "result0" }
},
{
grammar: 'start = a:"a" !{ }',
extractor: function(node) { return node.elements[1]; },
params: { a: "result0" }
},
/* Recursive walk */
{
grammar: 'start = a:"a" { } / "b" / "c"',
extractor: function(node) { return node.alternatives[0]; },
params: { a: "result0" }
},
{
grammar: 'start = "a" / "b" / c:"c" { }',
extractor: function(node) { return node.alternatives[2]; },
params: { c: "result0" }
},
{
grammar: 'start = (a:"a" { }) "b" "c"',
extractor: function(node) { return node.elements[0]; },
params: { a: "result0" }
},
{
grammar: 'start = "a" "b" (c:"c" { })',
extractor: function(node) { return node.elements[2]; },
params: { c: "result2" }
},
{
grammar: 'start = a:(b:"b" { })',
extractor: extractExpression,
params: { b: "result0" }
},
{
grammar: 'start = &(a:"a" { })',
extractor: extractExpression,
params: { a: "result0" }
},
{
grammar: 'start = !(a:"a" { })',
extractor: extractExpression,
params: { a: "result0" }
},
{
grammar: 'start = (a:"a" { })?',
extractor: extractExpression,
params: { a: "result0" }
},
{
grammar: 'start = (a:"a" { })*',
extractor: extractExpression,
params: { a: "result1" }
},
{
grammar: 'start = (a:"a" { })+',
extractor: extractExpression,
params: { a: "result1" }
},
{
grammar: 'start = (a:"a" { }) { }',
extractor: extractExpression,
params: { a: "result0" }
},
/* Scoping */
{
grammar: 'start = (a:"a" / b:"b" / c:"c") { }',
extractor: extractNode,
params: { }
},
{
grammar: 'start = a:(b:"b") { }',
extractor: extractNode,
params: { a: "result0" }
},
{
grammar: 'start = &(a:"a") { }',
extractor: extractNode,
params: { }
},
{
grammar: 'start = !(a:"a") { }',
extractor: extractNode,
params: { }
},
{
grammar: 'start = (a:"a")? { }',
extractor: extractNode,
params: { }
},
{
grammar: 'start = (a:"a")* { }',
extractor: extractNode,
params: { }
},
{
grammar: 'start = (a:"a")+ { }',
extractor: extractNode,
params: { }
},
{
grammar: 'start = (a:"a" { }) { }',
extractor: extractNode,
params: { }
},
/* Sequences */
{
grammar: 'start = a:"a" b:"b" c:"c" { }',
extractor: extractNode,
params: { a: "result0[0]", b: "result0[1]", c: "result0[2]" }
},
{
grammar: 'start = a:"a" (b:"b" c:"c" d:"d") e:"e"{ }',
extractor: extractNode,
params: {
a: "result0[0]",
b: "result0[1][0]",
c: "result0[1][1]",
d: "result0[1][2]",
e: "result0[2]"
}
},
/*
* Regression tests for a bug where e.g. resultVar names like |result10|
* were incorrectly treated as names derived from |result1|, leading to
* incorrect substitution.
*/
{
grammar: 'start = ("a" "b" "c" "d" "e" "f" "g" "h" "i" j:"j" { })*',
extractor: extractExpression,
params: { j: "result1[9]" } // Buggy code put "result1[0]0" here.
}
];
for (var i = 0; i < cases.length; i++) {
var ast = PEG.parser.parse(cases[i].grammar);
PEG.compiler.passes.computeVarNames(ast);
PEG.compiler.passes.computeParams(ast);
deepEqual(
cases[i].extractor(ast.rules[0].expression).params,
cases[i].params
);
}
});
})();