Optimize location info computation

Before this commit, position details (line and column) weren't computed
efficiently from the current parse position. There was a cache but it
held only one item and it was rarely hit in practice. This resulted in
frequent rescanning of the whole input when the |location| function was
used in various places in a grammar.

This commit extends the cache to remember position details for any
position they were ever computed for. In case of a cache miss, the cache
is searched for a value corresponding to the nearest lower position,
which is then used to compute position info for the desired position
(which is then cached). The whole input never needs to be rescanned.

No items are ever evicted from the cache. I think this is fine as the
max number of entries is the length of the input. If this becomes a
problem I can introduce some eviction logic later.

The performance impact of this change is significant. As the benchmark
suite doesn't contain any grammar with |location| calls I just used a
little ad-hoc benchmark script which measured time to parse the grammar
of PEG.js itself (which contains |location| calls):

  var fs     = require("fs"),
      parser = require("./lib/parser");

  var grammar = fs.readFileSync("./src/parser.pegjs", "utf-8"),
      startTime, endTime;

  startTime = (new Date()).getTime();
  parser.parse(grammar);
  endTime = (new Date()).getTime();

  console.log(endTime - startTime);

The measured time went from ~293 ms to ~54 ms on my machine.

Fixes #337.
redux
David Majda 9 years ago
parent 29bb921994
commit f2200e48af

@ -887,8 +887,7 @@ function generateJavascript(ast, options) {
'',
' peg$currPos = 0,',
' peg$savedPos = 0,',
' peg$cachedPos = 0,',
' peg$cachedPosDetails = { line: 1, column: 1, seenCR: false },',
' peg$posDetailsCache = [{ line: 1, column: 1, seenCR: false }],',
' peg$maxFailPos = 0,',
' peg$maxFailExpected = [],',
' peg$silentFails = 0,', // 0 = report failures, > 0 = silence failures
@ -979,10 +978,25 @@ function generateJavascript(ast, options) {
' }',
'',
' function peg$computePosDetails(pos) {',
' function advance(details, startPos, endPos) {',
' var p, ch;',
' var details = peg$posDetailsCache[pos],',
' p, ch;',
'',
' for (p = startPos; p < endPos; p++) {',
' if (details) {',
' return details;',
' } else {',
' p = pos - 1;',
' while (!peg$posDetailsCache[p]) {',
' p--;',
' }',
'',
' details = peg$posDetailsCache[p];',
' details = {',
' line: details.line,',
' column: details.column,',
' seenCR: details.seenCR',
' };',
'',
' while (p < pos) {',
' ch = input.charAt(p);',
' if (ch === "\\n") {',
' if (!details.seenCR) { details.line++; }',
@ -996,23 +1010,13 @@ function generateJavascript(ast, options) {
' details.column++;',
' details.seenCR = false;',
' }',
' }',
' }',
'',
' if (peg$cachedPos !== pos) {',
' if (peg$cachedPos > pos) {',
' peg$cachedPos = 0;',
' peg$cachedPosDetails = { line: 1, column: 1, seenCR: false };',
' p++',
' }',
' advance(peg$cachedPosDetails, peg$cachedPos, pos);',
' peg$cachedPos = pos;',
' }',
'',
' return {',
' line: peg$cachedPosDetails.line,',
' column: peg$cachedPosDetails.column,',
' seenCR: peg$cachedPosDetails.seenCR',
' };',
' peg$posDetailsCache[pos] = details;',
' return details',
' }',
' }',
'',
' function peg$computeLocation(startPos, endPos) {',

@ -375,8 +375,7 @@ module.exports = (function() {
peg$currPos = 0,
peg$savedPos = 0,
peg$cachedPos = 0,
peg$cachedPosDetails = { line: 1, column: 1, seenCR: false },
peg$posDetailsCache = [{ line: 1, column: 1, seenCR: false }],
peg$maxFailPos = 0,
peg$maxFailExpected = [],
peg$silentFails = 0,
@ -418,10 +417,25 @@ module.exports = (function() {
}
function peg$computePosDetails(pos) {
function advance(details, startPos, endPos) {
var p, ch;
var details = peg$posDetailsCache[pos],
p, ch;
for (p = startPos; p < endPos; p++) {
if (details) {
return details;
} else {
p = pos - 1;
while (!peg$posDetailsCache[p]) {
p--;
}
details = peg$posDetailsCache[p];
details = {
line: details.line,
column: details.column,
seenCR: details.seenCR
};
while (p < pos) {
ch = input.charAt(p);
if (ch === "\n") {
if (!details.seenCR) { details.line++; }
@ -435,23 +449,13 @@ module.exports = (function() {
details.column++;
details.seenCR = false;
}
}
}
if (peg$cachedPos !== pos) {
if (peg$cachedPos > pos) {
peg$cachedPos = 0;
peg$cachedPosDetails = { line: 1, column: 1, seenCR: false };
p++
}
advance(peg$cachedPosDetails, peg$cachedPos, pos);
peg$cachedPos = pos;
}
return {
line: peg$cachedPosDetails.line,
column: peg$cachedPosDetails.column,
seenCR: peg$cachedPosDetails.seenCR
};
peg$posDetailsCache[pos] = details;
return details
}
}
function peg$computeLocation(startPos, endPos) {

Loading…
Cancel
Save