Optimize location info computation

Before this commit, position details (line and column) weren't computed
efficiently from the current parse position. There was a cache but it
held only one item and it was rarely hit in practice. This resulted in
frequent rescanning of the whole input when the |location| function was
used in various places in a grammar.

This commit extends the cache to remember position details for any
position they were ever computed for. In case of a cache miss, the cache
is searched for a value corresponding to the nearest lower position,
which is then used to compute position info for the desired position
(which is then cached). The whole input never needs to be rescanned.

No items are ever evicted from the cache. I think this is fine as the
max number of entries is the length of the input. If this becomes a
problem I can introduce some eviction logic later.

The performance impact of this change is significant. As the benchmark
suite doesn't contain any grammar with |location| calls I just used a
little ad-hoc benchmark script which measured time to parse the grammar
of PEG.js itself (which contains |location| calls):

  var fs     = require("fs"),
      parser = require("./lib/parser");

  var grammar = fs.readFileSync("./src/parser.pegjs", "utf-8"),
      startTime, endTime;

  startTime = (new Date()).getTime();
  parser.parse(grammar);
  endTime = (new Date()).getTime();

  console.log(endTime - startTime);

The measured time went from ~293 ms to ~54 ms on my machine.

Fixes #337.
This commit is contained in:
David Majda 2015-05-29 14:43:54 -07:00
parent 29bb921994
commit f2200e48af
2 changed files with 46 additions and 38 deletions

View file

@ -887,8 +887,7 @@ function generateJavascript(ast, options) {
'',
' peg$currPos = 0,',
' peg$savedPos = 0,',
' peg$cachedPos = 0,',
' peg$cachedPosDetails = { line: 1, column: 1, seenCR: false },',
' peg$posDetailsCache = [{ line: 1, column: 1, seenCR: false }],',
' peg$maxFailPos = 0,',
' peg$maxFailExpected = [],',
' peg$silentFails = 0,', // 0 = report failures, > 0 = silence failures
@ -979,10 +978,25 @@ function generateJavascript(ast, options) {
' }',
'',
' function peg$computePosDetails(pos) {',
' function advance(details, startPos, endPos) {',
' var p, ch;',
' var details = peg$posDetailsCache[pos],',
' p, ch;',
'',
' for (p = startPos; p < endPos; p++) {',
' if (details) {',
' return details;',
' } else {',
' p = pos - 1;',
' while (!peg$posDetailsCache[p]) {',
' p--;',
' }',
'',
' details = peg$posDetailsCache[p];',
' details = {',
' line: details.line,',
' column: details.column,',
' seenCR: details.seenCR',
' };',
'',
' while (p < pos) {',
' ch = input.charAt(p);',
' if (ch === "\\n") {',
' if (!details.seenCR) { details.line++; }',
@ -996,23 +1010,13 @@ function generateJavascript(ast, options) {
' details.column++;',
' details.seenCR = false;',
' }',
' }',
'',
' p++',
' }',
'',
' if (peg$cachedPos !== pos) {',
' if (peg$cachedPos > pos) {',
' peg$cachedPos = 0;',
' peg$cachedPosDetails = { line: 1, column: 1, seenCR: false };',
' peg$posDetailsCache[pos] = details;',
' return details',
' }',
' advance(peg$cachedPosDetails, peg$cachedPos, pos);',
' peg$cachedPos = pos;',
' }',
'',
' return {',
' line: peg$cachedPosDetails.line,',
' column: peg$cachedPosDetails.column,',
' seenCR: peg$cachedPosDetails.seenCR',
' };',
' }',
'',
' function peg$computeLocation(startPos, endPos) {',

View file

@ -375,8 +375,7 @@ module.exports = (function() {
peg$currPos = 0,
peg$savedPos = 0,
peg$cachedPos = 0,
peg$cachedPosDetails = { line: 1, column: 1, seenCR: false },
peg$posDetailsCache = [{ line: 1, column: 1, seenCR: false }],
peg$maxFailPos = 0,
peg$maxFailExpected = [],
peg$silentFails = 0,
@ -418,10 +417,25 @@ module.exports = (function() {
}
function peg$computePosDetails(pos) {
function advance(details, startPos, endPos) {
var p, ch;
var details = peg$posDetailsCache[pos],
p, ch;
for (p = startPos; p < endPos; p++) {
if (details) {
return details;
} else {
p = pos - 1;
while (!peg$posDetailsCache[p]) {
p--;
}
details = peg$posDetailsCache[p];
details = {
line: details.line,
column: details.column,
seenCR: details.seenCR
};
while (p < pos) {
ch = input.charAt(p);
if (ch === "\n") {
if (!details.seenCR) { details.line++; }
@ -435,23 +449,13 @@ module.exports = (function() {
details.column++;
details.seenCR = false;
}
}
p++
}
if (peg$cachedPos !== pos) {
if (peg$cachedPos > pos) {
peg$cachedPos = 0;
peg$cachedPosDetails = { line: 1, column: 1, seenCR: false };
peg$posDetailsCache[pos] = details;
return details
}
advance(peg$cachedPosDetails, peg$cachedPos, pos);
peg$cachedPos = pos;
}
return {
line: peg$cachedPosDetails.line,
column: peg$cachedPosDetails.column,
seenCR: peg$cachedPosDetails.seenCR
};
}
function peg$computeLocation(startPos, endPos) {