From 22cb12347963397eceabe2eab1d7b6218d88d4db Mon Sep 17 00:00:00 2001 From: David Majda Date: Mon, 13 Jun 2016 13:39:13 +0200 Subject: [PATCH] Expectation refactoring 3/7: Change expectation processing Before this commit, expectations were sorted and de-duplicated before they were passed to "buildMessage" and exposed in the "expected" property of syntax errors. This commit moves this processing into "buildMessage" and rewrites it to process only expectation descriptions. This means expectations exposed in the "expected" property are "raw" (not sorted and de-duplicated). This change will allow us to get rid of the "description" property of expectations and compute descriptions dynamically from structured information in the expectations. This will make descriptions a presentation-only concept. It will also make generated parsers smaller. Note that to keep expectations in the "expected" property sorted even without the "description" property, some sorting scheme based on structured information in the expectations would have to be devised, which would complicate things with only a little benefit. Therefore I chose to keep the expectations there "raw". --- lib/compiler/passes/generate-js.js | 51 ++++++------------- .../generated-parser-behavior.spec.js | 44 +++++++--------- 2 files changed, 32 insertions(+), 63 deletions(-) diff --git a/lib/compiler/passes/generate-js.js b/lib/compiler/passes/generate-js.js index 7c3ba23..90aec45 100644 --- a/lib/compiler/passes/generate-js.js +++ b/lib/compiler/passes/generate-js.js @@ -1045,35 +1045,6 @@ function generateJS(ast, options) { ' }', '', ' function peg$buildException(message, expected, found, location) {', - ' function cleanupExpected(expected) {', - ' var i, j;', - '', - ' expected.sort(function(a, b) {', - ' if (a.description < b.description) {', - ' return -1;', - ' } else if (a.description > b.description) {', - ' return 1;', - ' } else {', - ' return 0;', - ' }', - ' });', - '', - /* - * This works because the bytecode generator guarantees that every - * expectation object exists only once, so it's enough to use |===| instead - * of deeper structural comparison. - */ - ' if (expected.length > 0) {', - ' for (i = 1, j = 1; i < expected.length; i++) {', - ' if (expected[i - 1] !== expected[i]) {', - ' expected[j] = expected[i];', - ' j++;', - ' }', - ' }', - ' expected.length = j;', - ' }', - ' }', - '', ' function buildMessage(expected, found) {', ' function escape(s) {', ' function hex(ch) { return ch.charCodeAt(0).toString(16).toUpperCase(); }', @@ -1090,16 +1061,28 @@ function generateJS(ast, options) { ' }', '', ' var expectedDescs = new Array(expected.length),', - ' expectedDesc, foundDesc, i;', + ' expectedDesc, foundDesc, i, j;', '', ' for (i = 0; i < expected.length; i++) {', ' expectedDescs[i] = expected[i].description;', ' }', '', - ' expectedDesc = expected.length > 1', + ' expectedDescs.sort();', + '', + ' if (expectedDescs.length > 0) {', + ' for (i = 1, j = 1; i < expectedDescs.length; i++) {', + ' if (expectedDescs[i - 1] !== expectedDescs[i]) {', + ' expectedDescs[j] = expectedDescs[i];', + ' j++;', + ' }', + ' }', + ' expectedDescs.length = j;', + ' }', + '', + ' expectedDesc = expectedDescs.length > 1', ' ? expectedDescs.slice(0, -1).join(", ")', ' + " or "', - ' + expectedDescs[expected.length - 1]', + ' + expectedDescs[expectedDescs.length - 1]', ' : expectedDescs[0];', '', ' foundDesc = found ? "\\"" + escape(found) + "\\"" : "end of input";', @@ -1107,10 +1090,6 @@ function generateJS(ast, options) { ' return "Expected " + expectedDesc + " but " + foundDesc + " found.";', ' }', '', - ' if (expected !== null) {', - ' cleanupExpected(expected);', - ' }', - '', ' return new peg$SyntaxError(', ' message !== null ? message : buildMessage(expected, found),', ' expected,', diff --git a/spec/behavior/generated-parser-behavior.spec.js b/spec/behavior/generated-parser-behavior.spec.js index 24975a2..fdec845 100644 --- a/spec/behavior/generated-parser-behavior.spec.js +++ b/spec/behavior/generated-parser-behavior.spec.js @@ -1364,33 +1364,6 @@ describe("generated parser behavior", function() { ] }); }); - - it("removes duplicates from expectations", function() { - /* - * There was a bug in the code that manifested only with three - * duplicates. This is why the following test uses three choices - * instead of seemingly sufficient two. - * - * See https://github.com/pegjs/pegjs/pull/146. - */ - var parser = peg.generate('start = "a" / "a" / "a"', options); - - expect(parser).toFailToParse("b", { - expected: [{ type: "literal", text: "a", ignoreCase: false, description: '"a"' }] - }); - }); - - it("sorts expectations", function() { - var parser = peg.generate('start = "c" / "b" / "a"', options); - - expect(parser).toFailToParse("d", { - expected: [ - { type: "literal", text: "a", ignoreCase: false, description: '"a"' }, - { type: "literal", text: "b", ignoreCase: false, description: '"b"' }, - { type: "literal", text: "c", ignoreCase: false, description: '"c"' } - ] - }); - }); }); describe("found string reporting", function() { @@ -1447,6 +1420,23 @@ describe("generated parser behavior", function() { message: 'Expected "a" but "b" found.' }); }); + + it("removes duplicates from expectations", function() { + var parser = peg.generate('start = "a" / "a"', options); + + expect(parser).toFailToParse("b", { + message: 'Expected "a" but "b" found.' + }); + }); + + it("sorts expectations", function() { + var parser = peg.generate('start = "c" / "b" / "a"', options); + + expect(parser).toFailToParse("d", { + message: 'Expected "a", "b" or "c" but "d" found.' + }); + }); + }); describe("position reporting", function() {