From 6b60896216fc6f00ab111e6b2e0c2b1a6c9fdba2 Mon Sep 17 00:00:00 2001 From: David Majda Date: Fri, 10 Jun 2016 15:18:25 +0200 Subject: [PATCH] Revert "Remove info about found string from syntax errors" This reverts commit 25ab98027d5181c30871394066b1fedc29cc90d3. Part of work on #428. --- README.md | 4 +- lib/compiler/passes/generate-js.js | 56 +++++++++++++--- lib/parser.js | 46 ++++++++++--- .../generated-parser-behavior.spec.js | 67 ++++++++++++++++--- 4 files changed, 144 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index 2cb3157..5df4462 100644 --- a/README.md +++ b/README.md @@ -155,8 +155,8 @@ Using the Parser Using the generated parser is simple — just call its `parse` method and pass an input string as a parameter. The method will return a parse result (the exact value depends on the grammar used to generate the parser) or throw an exception -if the input is invalid. The exception will contain `location`, `expected` and -`message` properties with more details about the error. +if the input is invalid. The exception will contain `location`, `expected`, +`found`, and `message` properties with more details about the error. ```javascript parser.parse("abba"); // returns ["a", "b", "b", "a"] diff --git a/lib/compiler/passes/generate-js.js b/lib/compiler/passes/generate-js.js index 5dfbb31..797f524 100644 --- a/lib/compiler/passes/generate-js.js +++ b/lib/compiler/passes/generate-js.js @@ -768,9 +768,10 @@ function generateJS(ast, options) { ' child.prototype = new ctor();', '}', '', - 'function peg$SyntaxError(message, expected, location) {', + 'function peg$SyntaxError(message, expected, found, location) {', ' this.message = message;', ' this.expected = expected;', + ' this.found = found;', ' this.location = location;', ' this.name = "SyntaxError";', '', @@ -965,6 +966,7 @@ function generateJS(ast, options) { ' throw peg$buildException(', ' null,', ' [{ type: "other", description: description }],', + ' input.substring(peg$savedPos, peg$currPos),', ' location', ' );', ' }', @@ -972,7 +974,12 @@ function generateJS(ast, options) { ' function error(message, location) {', ' location = location !== void 0 ? location : peg$computeLocation(peg$savedPos, peg$currPos)', '', - ' throw peg$buildException(message, null, location);', + ' throw peg$buildException(', + ' message,', + ' null,', + ' input.substring(peg$savedPos, peg$currPos),', + ' location', + ' );', ' }', '', ' function peg$computePosDetails(pos) {', @@ -1037,7 +1044,7 @@ function generateJS(ast, options) { ' peg$maxFailExpected.push(expected);', ' }', '', - ' function peg$buildException(message, expected, location) {', + ' function peg$buildException(message, expected, found, location) {', ' function cleanupExpected(expected) {', ' var i, j;', '', @@ -1067,9 +1074,36 @@ function generateJS(ast, options) { ' }', ' }', '', - ' function buildMessage(expected) {', + ' function buildMessage(expected, found) {', + ' function stringEscape(s) {', + ' function hex(ch) { return ch.charCodeAt(0).toString(16).toUpperCase(); }', + '', + /* + * ECMA-262, 5th ed., 7.8.4: All characters may appear literally in a + * string literal except for the closing quote character, backslash, + * carriage return, line separator, paragraph separator, and line feed. + * Any character may appear in the form of an escape sequence. + * + * For portability, we also escape all control and non-ASCII characters. + * Note that "\0" and "\v" escape sequences are not used because JSHint + * does not like the first and IE the second. + */ + ' return s', + ' .replace(/\\\\/g, \'\\\\\\\\\')', // backslash + ' .replace(/"/g, \'\\\\"\')', // closing double quote + ' .replace(/\\x08/g, \'\\\\b\')', // backspace + ' .replace(/\\t/g, \'\\\\t\')', // horizontal tab + ' .replace(/\\n/g, \'\\\\n\')', // line feed + ' .replace(/\\f/g, \'\\\\f\')', // form feed + ' .replace(/\\r/g, \'\\\\r\')', // carriage return + ' .replace(/[\\x00-\\x07\\x0B\\x0E\\x0F]/g, function(ch) { return \'\\\\x0\' + hex(ch); })', + ' .replace(/[\\x10-\\x1F\\x80-\\xFF]/g, function(ch) { return \'\\\\x\' + hex(ch); })', + ' .replace(/[\\u0100-\\u0FFF]/g, function(ch) { return \'\\\\u0\' + hex(ch); })', + ' .replace(/[\\u1000-\\uFFFF]/g, function(ch) { return \'\\\\u\' + hex(ch); });', + ' }', + '', ' var expectedDescs = new Array(expected.length),', - ' expectedDesc, i;', + ' expectedDesc, foundDesc, i;', '', ' for (i = 0; i < expected.length; i++) {', ' expectedDescs[i] = expected[i].description;', @@ -1081,7 +1115,9 @@ function generateJS(ast, options) { ' + expectedDescs[expected.length - 1]', ' : expectedDescs[0];', '', - ' return "Expected " + expectedDesc + ".";', + ' foundDesc = found ? "\\"" + stringEscape(found) + "\\"" : "end of input";', + '', + ' return "Expected " + expectedDesc + " but " + foundDesc + " found.";', ' }', '', ' if (expected !== null) {', @@ -1089,8 +1125,9 @@ function generateJS(ast, options) { ' }', '', ' return new peg$SyntaxError(', - ' message !== null ? message : buildMessage(expected),', + ' message !== null ? message : buildMessage(expected, found),', ' expected,', + ' found,', ' location', ' );', ' }', @@ -1130,7 +1167,10 @@ function generateJS(ast, options) { ' throw peg$buildException(', ' null,', ' peg$maxFailExpected,', - ' peg$computeLocation(peg$maxFailPos, peg$maxFailPos)', + ' peg$maxFailPos < input.length ? input.charAt(peg$maxFailPos) : null,', + ' peg$maxFailPos < input.length', + ' ? peg$computeLocation(peg$maxFailPos, peg$maxFailPos + 1)', + ' : peg$computeLocation(peg$maxFailPos, peg$maxFailPos)', ' );', ' }', '}' diff --git a/lib/parser.js b/lib/parser.js index 34d114f..8bf8156 100644 --- a/lib/parser.js +++ b/lib/parser.js @@ -21,9 +21,10 @@ child.prototype = new ctor(); } - function peg$SyntaxError(message, expected, location) { + function peg$SyntaxError(message, expected, found, location) { this.message = message; this.expected = expected; + this.found = found; this.location = location; this.name = "SyntaxError"; @@ -427,6 +428,7 @@ throw peg$buildException( null, [{ type: "other", description: description }], + input.substring(peg$savedPos, peg$currPos), location ); } @@ -434,7 +436,12 @@ function error(message, location) { location = location !== void 0 ? location : peg$computeLocation(peg$savedPos, peg$currPos) - throw peg$buildException(message, null, location); + throw peg$buildException( + message, + null, + input.substring(peg$savedPos, peg$currPos), + location + ); } function peg$computePosDetails(pos) { @@ -499,7 +506,7 @@ peg$maxFailExpected.push(expected); } - function peg$buildException(message, expected, location) { + function peg$buildException(message, expected, found, location) { function cleanupExpected(expected) { var i, j; @@ -524,9 +531,26 @@ } } - function buildMessage(expected) { + function buildMessage(expected, found) { + function stringEscape(s) { + function hex(ch) { return ch.charCodeAt(0).toString(16).toUpperCase(); } + + return s + .replace(/\\/g, '\\\\') + .replace(/"/g, '\\"') + .replace(/\x08/g, '\\b') + .replace(/\t/g, '\\t') + .replace(/\n/g, '\\n') + .replace(/\f/g, '\\f') + .replace(/\r/g, '\\r') + .replace(/[\x00-\x07\x0B\x0E\x0F]/g, function(ch) { return '\\x0' + hex(ch); }) + .replace(/[\x10-\x1F\x80-\xFF]/g, function(ch) { return '\\x' + hex(ch); }) + .replace(/[\u0100-\u0FFF]/g, function(ch) { return '\\u0' + hex(ch); }) + .replace(/[\u1000-\uFFFF]/g, function(ch) { return '\\u' + hex(ch); }); + } + var expectedDescs = new Array(expected.length), - expectedDesc, i; + expectedDesc, foundDesc, i; for (i = 0; i < expected.length; i++) { expectedDescs[i] = expected[i].description; @@ -538,7 +562,9 @@ + expectedDescs[expected.length - 1] : expectedDescs[0]; - return "Expected " + expectedDesc + "."; + foundDesc = found ? "\"" + stringEscape(found) + "\"" : "end of input"; + + return "Expected " + expectedDesc + " but " + foundDesc + " found."; } if (expected !== null) { @@ -546,8 +572,9 @@ } return new peg$SyntaxError( - message !== null ? message : buildMessage(expected), + message !== null ? message : buildMessage(expected, found), expected, + found, location ); } @@ -4948,7 +4975,10 @@ throw peg$buildException( null, peg$maxFailExpected, - peg$computeLocation(peg$maxFailPos, peg$maxFailPos) + peg$maxFailPos < input.length ? input.charAt(peg$maxFailPos) : null, + peg$maxFailPos < input.length + ? peg$computeLocation(peg$maxFailPos, peg$maxFailPos + 1) + : peg$computeLocation(peg$maxFailPos, peg$maxFailPos) ); } } diff --git a/spec/behavior/generated-parser-behavior.spec.js b/spec/behavior/generated-parser-behavior.spec.js index c2c94c5..f129494 100644 --- a/spec/behavior/generated-parser-behavior.spec.js +++ b/spec/behavior/generated-parser-behavior.spec.js @@ -1219,8 +1219,9 @@ describe("generated parser behavior", function() { ); expect(parser).toFailToParse("a", { - message: 'Expected a.', + message: 'Expected a but "a" found.', expected: [{ type: "other", description: "a" }], + found: "a", location: { start: { offset: 0, line: 1, column: 1 }, end: { offset: 1, line: 1, column: 2 } @@ -1239,8 +1240,9 @@ describe("generated parser behavior", function() { ].join("\n"), options); expect(parser).toFailToParse("a", { - message: 'Expected a.', + message: 'Expected a but "a" found.', expected: [{ type: "other", description: "a" }], + found: "a", location: { start: { offset: 1, line: 1, column: 2 }, end: { offset: 2, line: 1, column: 3 } @@ -1258,6 +1260,7 @@ describe("generated parser behavior", function() { expect(parser).toFailToParse("a", { message: "a", + found: "a", expected: null, location: { start: { offset: 0, line: 1, column: 1 }, @@ -1279,6 +1282,7 @@ describe("generated parser behavior", function() { expect(parser).toFailToParse("a", { message: "a", expected: null, + found: "a", location: { start: { offset: 1, line: 1, column: 2 }, end: { offset: 2, line: 1, column: 3 } @@ -1395,12 +1399,26 @@ describe("generated parser behavior", function() { }); }); + describe("found string reporting", function() { + it("reports found string correctly at the end of input", function() { + var parser = peg.generate('start = "a"', options); + + expect(parser).toFailToParse("", { found: null }); + }); + + it("reports found string correctly in the middle of input", function() { + var parser = peg.generate('start = "a"', options); + + expect(parser).toFailToParse("b", { found: "b" }); + }); + }); + describe("message building", function() { it("builds message correctly with no alternative", function() { var parser = peg.generate('start = "a"', options); expect(parser).toFailToParse("ab", { - message: 'Expected end of input.' + message: 'Expected end of input but "b" found.' }); }); @@ -1408,7 +1426,7 @@ describe("generated parser behavior", function() { var parser = peg.generate('start = "a"', options); expect(parser).toFailToParse("b", { - message: 'Expected "a".' + message: 'Expected "a" but "b" found.' }); }); @@ -1416,16 +1434,32 @@ describe("generated parser behavior", function() { var parser = peg.generate('start = "a" / "b" / "c"', options); expect(parser).toFailToParse("d", { - message: 'Expected "a", "b" or "c".' + message: 'Expected "a", "b" or "c" but "d" found.' + }); + }); + + it("builds message correctly at the end of input", function() { + var parser = peg.generate('start = "a"', options); + + expect(parser).toFailToParse("", { + message: 'Expected "a" but end of input found.' + }); + }); + + it("builds message correctly in the middle of input", function() { + var parser = peg.generate('start = "a"', options); + + expect(parser).toFailToParse("b", { + message: 'Expected "a" but "b" found.' }); }); }); describe("position reporting", function() { - it("reports position correctly with no trailing input", function() { + it("reports position correctly at the end of input", function() { var parser = peg.generate('start = "a"', options); - expect(parser).toFailToParse("b", { + expect(parser).toFailToParse("", { location: { start: { offset: 0, line: 1, column: 1 }, end: { offset: 0, line: 1, column: 1 } @@ -1433,13 +1467,24 @@ describe("generated parser behavior", function() { }); }); + it("reports position correctly in the middle of input", function() { + var parser = peg.generate('start = "a"', options); + + expect(parser).toFailToParse("b", { + location: { + start: { offset: 0, line: 1, column: 1 }, + end: { offset: 1, line: 1, column: 2 } + } + }); + }); + it("reports position correctly with trailing input", function() { var parser = peg.generate('start = "a"', options); expect(parser).toFailToParse("aa", { location: { start: { offset: 1, line: 1, column: 2 }, - end: { offset: 1, line: 1, column: 2 } + end: { offset: 2, line: 1, column: 3 } } }); }); @@ -1455,7 +1500,7 @@ describe("generated parser behavior", function() { expect(parser).toFailToParse("1\n2\n\n3\n\n\n4 5 x", { location: { start: { offset: 13, line: 7, column: 5 }, - end: { offset: 13, line: 7, column: 5 } + end: { offset: 14, line: 7, column: 6 } } }); @@ -1463,13 +1508,13 @@ describe("generated parser behavior", function() { expect(parser).toFailToParse("1\nx", { // Old Mac location: { start: { offset: 2, line: 2, column: 1 }, - end: { offset: 2, line: 2, column: 1 } + end: { offset: 3, line: 2, column: 2 } } }); expect(parser).toFailToParse("1\r\nx", { // Windows location: { start: { offset: 3, line: 2, column: 1 }, - end: { offset: 3, line: 2, column: 1 } + end: { offset: 4, line: 2, column: 2 } } }); });