From f2200e48af57bc8e90342e62e9389c9ca3e4f91d Mon Sep 17 00:00:00 2001 From: David Majda Date: Fri, 29 May 2015 14:43:54 -0700 Subject: [PATCH] Optimize location info computation Before this commit, position details (line and column) weren't computed efficiently from the current parse position. There was a cache but it held only one item and it was rarely hit in practice. This resulted in frequent rescanning of the whole input when the |location| function was used in various places in a grammar. This commit extends the cache to remember position details for any position they were ever computed for. In case of a cache miss, the cache is searched for a value corresponding to the nearest lower position, which is then used to compute position info for the desired position (which is then cached). The whole input never needs to be rescanned. No items are ever evicted from the cache. I think this is fine as the max number of entries is the length of the input. If this becomes a problem I can introduce some eviction logic later. The performance impact of this change is significant. As the benchmark suite doesn't contain any grammar with |location| calls I just used a little ad-hoc benchmark script which measured time to parse the grammar of PEG.js itself (which contains |location| calls): var fs = require("fs"), parser = require("./lib/parser"); var grammar = fs.readFileSync("./src/parser.pegjs", "utf-8"), startTime, endTime; startTime = (new Date()).getTime(); parser.parse(grammar); endTime = (new Date()).getTime(); console.log(endTime - startTime); The measured time went from ~293 ms to ~54 ms on my machine. Fixes #337. --- lib/compiler/passes/generate-javascript.js | 42 ++++++++++++---------- lib/parser.js | 42 ++++++++++++---------- 2 files changed, 46 insertions(+), 38 deletions(-) diff --git a/lib/compiler/passes/generate-javascript.js b/lib/compiler/passes/generate-javascript.js index fb159e7..10b0f77 100644 --- a/lib/compiler/passes/generate-javascript.js +++ b/lib/compiler/passes/generate-javascript.js @@ -887,8 +887,7 @@ function generateJavascript(ast, options) { '', ' peg$currPos = 0,', ' peg$savedPos = 0,', - ' peg$cachedPos = 0,', - ' peg$cachedPosDetails = { line: 1, column: 1, seenCR: false },', + ' peg$posDetailsCache = [{ line: 1, column: 1, seenCR: false }],', ' peg$maxFailPos = 0,', ' peg$maxFailExpected = [],', ' peg$silentFails = 0,', // 0 = report failures, > 0 = silence failures @@ -979,10 +978,25 @@ function generateJavascript(ast, options) { ' }', '', ' function peg$computePosDetails(pos) {', - ' function advance(details, startPos, endPos) {', - ' var p, ch;', + ' var details = peg$posDetailsCache[pos],', + ' p, ch;', '', - ' for (p = startPos; p < endPos; p++) {', + ' if (details) {', + ' return details;', + ' } else {', + ' p = pos - 1;', + ' while (!peg$posDetailsCache[p]) {', + ' p--;', + ' }', + '', + ' details = peg$posDetailsCache[p];', + ' details = {', + ' line: details.line,', + ' column: details.column,', + ' seenCR: details.seenCR', + ' };', + '', + ' while (p < pos) {', ' ch = input.charAt(p);', ' if (ch === "\\n") {', ' if (!details.seenCR) { details.line++; }', @@ -996,23 +1010,13 @@ function generateJavascript(ast, options) { ' details.column++;', ' details.seenCR = false;', ' }', - ' }', - ' }', '', - ' if (peg$cachedPos !== pos) {', - ' if (peg$cachedPos > pos) {', - ' peg$cachedPos = 0;', - ' peg$cachedPosDetails = { line: 1, column: 1, seenCR: false };', + ' p++', ' }', - ' advance(peg$cachedPosDetails, peg$cachedPos, pos);', - ' peg$cachedPos = pos;', - ' }', '', - ' return {', - ' line: peg$cachedPosDetails.line,', - ' column: peg$cachedPosDetails.column,', - ' seenCR: peg$cachedPosDetails.seenCR', - ' };', + ' peg$posDetailsCache[pos] = details;', + ' return details', + ' }', ' }', '', ' function peg$computeLocation(startPos, endPos) {', diff --git a/lib/parser.js b/lib/parser.js index 290c013..22cf052 100644 --- a/lib/parser.js +++ b/lib/parser.js @@ -375,8 +375,7 @@ module.exports = (function() { peg$currPos = 0, peg$savedPos = 0, - peg$cachedPos = 0, - peg$cachedPosDetails = { line: 1, column: 1, seenCR: false }, + peg$posDetailsCache = [{ line: 1, column: 1, seenCR: false }], peg$maxFailPos = 0, peg$maxFailExpected = [], peg$silentFails = 0, @@ -418,10 +417,25 @@ module.exports = (function() { } function peg$computePosDetails(pos) { - function advance(details, startPos, endPos) { - var p, ch; + var details = peg$posDetailsCache[pos], + p, ch; - for (p = startPos; p < endPos; p++) { + if (details) { + return details; + } else { + p = pos - 1; + while (!peg$posDetailsCache[p]) { + p--; + } + + details = peg$posDetailsCache[p]; + details = { + line: details.line, + column: details.column, + seenCR: details.seenCR + }; + + while (p < pos) { ch = input.charAt(p); if (ch === "\n") { if (!details.seenCR) { details.line++; } @@ -435,23 +449,13 @@ module.exports = (function() { details.column++; details.seenCR = false; } - } - } - if (peg$cachedPos !== pos) { - if (peg$cachedPos > pos) { - peg$cachedPos = 0; - peg$cachedPosDetails = { line: 1, column: 1, seenCR: false }; + p++ } - advance(peg$cachedPosDetails, peg$cachedPos, pos); - peg$cachedPos = pos; - } - return { - line: peg$cachedPosDetails.line, - column: peg$cachedPosDetails.column, - seenCR: peg$cachedPosDetails.seenCR - }; + peg$posDetailsCache[pos] = details; + return details + } } function peg$computeLocation(startPos, endPos) {