From 18d266be67b989a4ca6650f00e6e29beadf2fd76 Mon Sep 17 00:00:00 2001 From: David Majda Date: Fri, 5 Feb 2016 17:00:52 +0100 Subject: [PATCH] Remove support for newlines other than "\n" and "\r\n" Before this commit, generated parsers considered the following character sequences as newlines: Sequence Description ------------------------------ "\n" Unix "\r" Old Mac "\r\n" Windows "\u2028" line separator "\u2029" paragraph separator This commit limits the sequences only to "\n" and "\r\n". The reason is that nobody uses Unicode newlines or "\r" in practice. A positive side effect of the change is that newline-handling code became simpler (and likely faster). --- lib/compiler/passes/generate-js.js | 17 +--- lib/parser.js | 17 +--- .../generated-parser-behavior.spec.js | 86 +++---------------- 3 files changed, 20 insertions(+), 100 deletions(-) diff --git a/lib/compiler/passes/generate-js.js b/lib/compiler/passes/generate-js.js index d30e373..620dda1 100644 --- a/lib/compiler/passes/generate-js.js +++ b/lib/compiler/passes/generate-js.js @@ -895,7 +895,7 @@ function generateJS(ast, options) { '', ' peg$currPos = 0,', ' peg$savedPos = 0,', - ' peg$posDetailsCache = [{ line: 1, column: 1, seenCR: false }],', + ' peg$posDetailsCache = [{ line: 1, column: 1 }],', ' peg$maxFailPos = 0,', ' peg$maxFailExpected = [],', ' peg$silentFails = 0,', // 0 = report failures, > 0 = silence failures @@ -984,8 +984,7 @@ function generateJS(ast, options) { ' }', '', ' function peg$computePosDetails(pos) {', - ' var details = peg$posDetailsCache[pos],', - ' p, ch;', + ' var details = peg$posDetailsCache[pos], p;', '', ' if (details) {', ' return details;', @@ -998,23 +997,15 @@ function generateJS(ast, options) { ' details = peg$posDetailsCache[p];', ' details = {', ' line: details.line,', - ' column: details.column,', - ' seenCR: details.seenCR', + ' column: details.column', ' };', '', ' while (p < pos) {', - ' ch = input.charAt(p);', - ' if (ch === "\\n") {', - ' if (!details.seenCR) { details.line++; }', - ' details.column = 1;', - ' details.seenCR = false;', - ' } else if (ch === "\\r" || ch === "\\u2028" || ch === "\\u2029") {', + ' if (input.charAt(p) === "\\n") {', ' details.line++;', ' details.column = 1;', - ' details.seenCR = true;', ' } else {', ' details.column++;', - ' details.seenCR = false;', ' }', '', ' p++;', diff --git a/lib/parser.js b/lib/parser.js index 96237c2..eb0c8e1 100644 --- a/lib/parser.js +++ b/lib/parser.js @@ -382,7 +382,7 @@ module.exports = (function() { peg$currPos = 0, peg$savedPos = 0, - peg$posDetailsCache = [{ line: 1, column: 1, seenCR: false }], + peg$posDetailsCache = [{ line: 1, column: 1 }], peg$maxFailPos = 0, peg$maxFailExpected = [], peg$silentFails = 0, @@ -422,8 +422,7 @@ module.exports = (function() { } function peg$computePosDetails(pos) { - var details = peg$posDetailsCache[pos], - p, ch; + var details = peg$posDetailsCache[pos], p; if (details) { return details; @@ -436,23 +435,15 @@ module.exports = (function() { details = peg$posDetailsCache[p]; details = { line: details.line, - column: details.column, - seenCR: details.seenCR + column: details.column }; while (p < pos) { - ch = input.charAt(p); - if (ch === "\n") { - if (!details.seenCR) { details.line++; } - details.column = 1; - details.seenCR = false; - } else if (ch === "\r" || ch === "\u2028" || ch === "\u2029") { + if (input.charAt(p) === "\n") { details.line++; details.column = 1; - details.seenCR = true; } else { details.column++; - details.seenCR = false; } p++; diff --git a/spec/behavior/generated-parser-behavior.spec.js b/spec/behavior/generated-parser-behavior.spec.js index 42b32dc..e23790f 100644 --- a/spec/behavior/generated-parser-behavior.spec.js +++ b/spec/behavior/generated-parser-behavior.spec.js @@ -596,7 +596,7 @@ describe("generated parser behavior", function() { 'thing = digit / mark', 'digit = [0-9]', 'mark = &{ result = location(); return true; } "x"', - 'nl = [\\r"\\n\\u2028\\u2029]' + 'nl = "\\r"? "\\n"' ].join("\n"), options); expect(parser).toParse("1\n2\n\n3\n\n\n4 5 x", { @@ -604,8 +604,8 @@ describe("generated parser behavior", function() { end: { offset: 13, line: 7, column: 5 } }); - /* Non-Unix newlines */ - expect(parser).toParse("1\rx", { // Old Mac + /* Newline representations */ + expect(parser).toParse("1\nx", { // Unix start: { offset: 2, line: 2, column: 1 }, end: { offset: 2, line: 2, column: 1 } }); @@ -613,20 +613,6 @@ describe("generated parser behavior", function() { start: { offset: 3, line: 2, column: 1 }, end: { offset: 3, line: 2, column: 1 } }); - expect(parser).toParse("1\n\rx", { // mismatched - start: { offset: 3, line: 3, column: 1 }, - end: { offset: 3, line: 3, column: 1 } - }); - - /* Strange newlines */ - expect(parser).toParse("1\u2028x", { // line separator - start: { offset: 2, line: 2, column: 1 }, - end: { offset: 2, line: 2, column: 1 } - }); - expect(parser).toParse("1\u2029x", { // paragraph separator - start: { offset: 2, line: 2, column: 1 }, - end: { offset: 2, line: 2, column: 1 } - }); }); }); }); @@ -809,7 +795,7 @@ describe("generated parser behavior", function() { 'thing = digit / mark', 'digit = [0-9]', 'mark = !{ result = location(); return false; } "x"', - 'nl = [\\r"\\n\\u2028\\u2029]' + 'nl = "\\r"? "\\n"' ].join("\n"), options); expect(parser).toParse("1\n2\n\n3\n\n\n4 5 x", { @@ -817,8 +803,8 @@ describe("generated parser behavior", function() { end: { offset: 13, line: 7, column: 5 } }); - /* Non-Unix newlines */ - expect(parser).toParse("1\rx", { // Old Mac + /* Newline representations */ + expect(parser).toParse("1\nx", { // Unix start: { offset: 2, line: 2, column: 1 }, end: { offset: 2, line: 2, column: 1 } }); @@ -826,20 +812,6 @@ describe("generated parser behavior", function() { start: { offset: 3, line: 2, column: 1 }, end: { offset: 3, line: 2, column: 1 } }); - expect(parser).toParse("1\n\rx", { // mismatched - start: { offset: 3, line: 3, column: 1 }, - end: { offset: 3, line: 3, column: 1 } - }); - - /* Strange newlines */ - expect(parser).toParse("1\u2028x", { // line separator - start: { offset: 2, line: 2, column: 1 }, - end: { offset: 2, line: 2, column: 1 } - }); - expect(parser).toParse("1\u2029x", { // paragraph separator - start: { offset: 2, line: 2, column: 1 }, - end: { offset: 2, line: 2, column: 1 } - }); }); }); }); @@ -1227,7 +1199,7 @@ describe("generated parser behavior", function() { 'thing = digit / mark', 'digit = [0-9]', 'mark = "x" { result = location(); }', - 'nl = [\\r\\n\\u2028\\u2029]' + 'nl = "\\r"? "\\n"' ].join("\n"), options); expect(parser).toParse("1\n2\n\n3\n\n\n4 5 x", { @@ -1235,8 +1207,8 @@ describe("generated parser behavior", function() { end: { offset: 14, line: 7, column: 6 } }); - /* Non-Unix newlines */ - expect(parser).toParse("1\rx", { // Old Mac + /* Newline representations */ + expect(parser).toParse("1\nx", { // Unix start: { offset: 2, line: 2, column: 1 }, end: { offset: 3, line: 2, column: 2 } }); @@ -1244,20 +1216,6 @@ describe("generated parser behavior", function() { start: { offset: 3, line: 2, column: 1 }, end: { offset: 4, line: 2, column: 2 } }); - expect(parser).toParse("1\n\rx", { // mismatched - start: { offset: 3, line: 3, column: 1 }, - end: { offset: 4, line: 3, column: 2 } - }); - - /* Strange newlines */ - expect(parser).toParse("1\u2028x", { // line separator - start: { offset: 2, line: 2, column: 1 }, - end: { offset: 3, line: 2, column: 2 } - }); - expect(parser).toParse("1\u2029x", { // paragraph separator - start: { offset: 2, line: 2, column: 1 }, - end: { offset: 3, line: 2, column: 2 } - }); }); it("|expected| terminates parsing and throws an exception", function() { @@ -1454,7 +1412,7 @@ describe("generated parser behavior", function() { 'start = line (nl+ line)*', 'line = digit (" "+ digit)*', 'digit = [0-9]', - 'nl = [\\r\\n\\u2028\\u2029]' + 'nl = "\\r"? "\\n"' ].join("\n"), options); expect(parser).toFailToParse("1\n2\n\n3\n\n\n4 5 x", { @@ -1464,8 +1422,8 @@ describe("generated parser behavior", function() { } }); - /* Non-Unix newlines */ - expect(parser).toFailToParse("1\rx", { // Old Mac + /* Newline representations */ + expect(parser).toFailToParse("1\nx", { // Old Mac location: { start: { offset: 2, line: 2, column: 1 }, end: { offset: 2, line: 2, column: 1 } @@ -1477,26 +1435,6 @@ describe("generated parser behavior", function() { end: { offset: 3, line: 2, column: 1 } } }); - expect(parser).toFailToParse("1\n\rx", { // mismatched - location: { - start: { offset: 3, line: 3, column: 1 }, - end: { offset: 3, line: 3, column: 1 } - } - }); - - /* Strange newlines */ - expect(parser).toFailToParse("1\u2028x", { // line separator - location: { - start: { offset: 2, line: 2, column: 1 }, - end: { offset: 2, line: 2, column: 1 } - } - }); - expect(parser).toFailToParse("1\u2029x", { // paragraph separator - location: { - start: { offset: 2, line: 2, column: 1 }, - end: { offset: 2, line: 2, column: 1 } - } - }); }); }); });