Remove support for newlines other than "\n" and "\r\n"

Before this commit, generated parsers considered the following character
sequences as newlines:

  Sequence   Description
  ------------------------------
  "\n"       Unix
  "\r"       Old Mac
  "\r\n"     Windows
  "\u2028"   line separator
  "\u2029"   paragraph separator

This commit limits the sequences only to "\n" and "\r\n". The reason is
that nobody uses Unicode newlines or "\r" in practice.

A positive side effect of the change is that newline-handling code
became simpler (and likely faster).
redux
David Majda 8 years ago
parent e61c23c634
commit 18d266be67

@ -895,7 +895,7 @@ function generateJS(ast, options) {
'',
' peg$currPos = 0,',
' peg$savedPos = 0,',
' peg$posDetailsCache = [{ line: 1, column: 1, seenCR: false }],',
' peg$posDetailsCache = [{ line: 1, column: 1 }],',
' peg$maxFailPos = 0,',
' peg$maxFailExpected = [],',
' peg$silentFails = 0,', // 0 = report failures, > 0 = silence failures
@ -984,8 +984,7 @@ function generateJS(ast, options) {
' }',
'',
' function peg$computePosDetails(pos) {',
' var details = peg$posDetailsCache[pos],',
' p, ch;',
' var details = peg$posDetailsCache[pos], p;',
'',
' if (details) {',
' return details;',
@ -998,23 +997,15 @@ function generateJS(ast, options) {
' details = peg$posDetailsCache[p];',
' details = {',
' line: details.line,',
' column: details.column,',
' seenCR: details.seenCR',
' column: details.column',
' };',
'',
' while (p < pos) {',
' ch = input.charAt(p);',
' if (ch === "\\n") {',
' if (!details.seenCR) { details.line++; }',
' details.column = 1;',
' details.seenCR = false;',
' } else if (ch === "\\r" || ch === "\\u2028" || ch === "\\u2029") {',
' if (input.charAt(p) === "\\n") {',
' details.line++;',
' details.column = 1;',
' details.seenCR = true;',
' } else {',
' details.column++;',
' details.seenCR = false;',
' }',
'',
' p++;',

@ -382,7 +382,7 @@ module.exports = (function() {
peg$currPos = 0,
peg$savedPos = 0,
peg$posDetailsCache = [{ line: 1, column: 1, seenCR: false }],
peg$posDetailsCache = [{ line: 1, column: 1 }],
peg$maxFailPos = 0,
peg$maxFailExpected = [],
peg$silentFails = 0,
@ -422,8 +422,7 @@ module.exports = (function() {
}
function peg$computePosDetails(pos) {
var details = peg$posDetailsCache[pos],
p, ch;
var details = peg$posDetailsCache[pos], p;
if (details) {
return details;
@ -436,23 +435,15 @@ module.exports = (function() {
details = peg$posDetailsCache[p];
details = {
line: details.line,
column: details.column,
seenCR: details.seenCR
column: details.column
};
while (p < pos) {
ch = input.charAt(p);
if (ch === "\n") {
if (!details.seenCR) { details.line++; }
details.column = 1;
details.seenCR = false;
} else if (ch === "\r" || ch === "\u2028" || ch === "\u2029") {
if (input.charAt(p) === "\n") {
details.line++;
details.column = 1;
details.seenCR = true;
} else {
details.column++;
details.seenCR = false;
}
p++;

@ -596,7 +596,7 @@ describe("generated parser behavior", function() {
'thing = digit / mark',
'digit = [0-9]',
'mark = &{ result = location(); return true; } "x"',
'nl = [\\r"\\n\\u2028\\u2029]'
'nl = "\\r"? "\\n"'
].join("\n"), options);
expect(parser).toParse("1\n2\n\n3\n\n\n4 5 x", {
@ -604,8 +604,8 @@ describe("generated parser behavior", function() {
end: { offset: 13, line: 7, column: 5 }
});
/* Non-Unix newlines */
expect(parser).toParse("1\rx", { // Old Mac
/* Newline representations */
expect(parser).toParse("1\nx", { // Unix
start: { offset: 2, line: 2, column: 1 },
end: { offset: 2, line: 2, column: 1 }
});
@ -613,20 +613,6 @@ describe("generated parser behavior", function() {
start: { offset: 3, line: 2, column: 1 },
end: { offset: 3, line: 2, column: 1 }
});
expect(parser).toParse("1\n\rx", { // mismatched
start: { offset: 3, line: 3, column: 1 },
end: { offset: 3, line: 3, column: 1 }
});
/* Strange newlines */
expect(parser).toParse("1\u2028x", { // line separator
start: { offset: 2, line: 2, column: 1 },
end: { offset: 2, line: 2, column: 1 }
});
expect(parser).toParse("1\u2029x", { // paragraph separator
start: { offset: 2, line: 2, column: 1 },
end: { offset: 2, line: 2, column: 1 }
});
});
});
});
@ -809,7 +795,7 @@ describe("generated parser behavior", function() {
'thing = digit / mark',
'digit = [0-9]',
'mark = !{ result = location(); return false; } "x"',
'nl = [\\r"\\n\\u2028\\u2029]'
'nl = "\\r"? "\\n"'
].join("\n"), options);
expect(parser).toParse("1\n2\n\n3\n\n\n4 5 x", {
@ -817,8 +803,8 @@ describe("generated parser behavior", function() {
end: { offset: 13, line: 7, column: 5 }
});
/* Non-Unix newlines */
expect(parser).toParse("1\rx", { // Old Mac
/* Newline representations */
expect(parser).toParse("1\nx", { // Unix
start: { offset: 2, line: 2, column: 1 },
end: { offset: 2, line: 2, column: 1 }
});
@ -826,20 +812,6 @@ describe("generated parser behavior", function() {
start: { offset: 3, line: 2, column: 1 },
end: { offset: 3, line: 2, column: 1 }
});
expect(parser).toParse("1\n\rx", { // mismatched
start: { offset: 3, line: 3, column: 1 },
end: { offset: 3, line: 3, column: 1 }
});
/* Strange newlines */
expect(parser).toParse("1\u2028x", { // line separator
start: { offset: 2, line: 2, column: 1 },
end: { offset: 2, line: 2, column: 1 }
});
expect(parser).toParse("1\u2029x", { // paragraph separator
start: { offset: 2, line: 2, column: 1 },
end: { offset: 2, line: 2, column: 1 }
});
});
});
});
@ -1227,7 +1199,7 @@ describe("generated parser behavior", function() {
'thing = digit / mark',
'digit = [0-9]',
'mark = "x" { result = location(); }',
'nl = [\\r\\n\\u2028\\u2029]'
'nl = "\\r"? "\\n"'
].join("\n"), options);
expect(parser).toParse("1\n2\n\n3\n\n\n4 5 x", {
@ -1235,8 +1207,8 @@ describe("generated parser behavior", function() {
end: { offset: 14, line: 7, column: 6 }
});
/* Non-Unix newlines */
expect(parser).toParse("1\rx", { // Old Mac
/* Newline representations */
expect(parser).toParse("1\nx", { // Unix
start: { offset: 2, line: 2, column: 1 },
end: { offset: 3, line: 2, column: 2 }
});
@ -1244,20 +1216,6 @@ describe("generated parser behavior", function() {
start: { offset: 3, line: 2, column: 1 },
end: { offset: 4, line: 2, column: 2 }
});
expect(parser).toParse("1\n\rx", { // mismatched
start: { offset: 3, line: 3, column: 1 },
end: { offset: 4, line: 3, column: 2 }
});
/* Strange newlines */
expect(parser).toParse("1\u2028x", { // line separator
start: { offset: 2, line: 2, column: 1 },
end: { offset: 3, line: 2, column: 2 }
});
expect(parser).toParse("1\u2029x", { // paragraph separator
start: { offset: 2, line: 2, column: 1 },
end: { offset: 3, line: 2, column: 2 }
});
});
it("|expected| terminates parsing and throws an exception", function() {
@ -1454,7 +1412,7 @@ describe("generated parser behavior", function() {
'start = line (nl+ line)*',
'line = digit (" "+ digit)*',
'digit = [0-9]',
'nl = [\\r\\n\\u2028\\u2029]'
'nl = "\\r"? "\\n"'
].join("\n"), options);
expect(parser).toFailToParse("1\n2\n\n3\n\n\n4 5 x", {
@ -1464,8 +1422,8 @@ describe("generated parser behavior", function() {
}
});
/* Non-Unix newlines */
expect(parser).toFailToParse("1\rx", { // Old Mac
/* Newline representations */
expect(parser).toFailToParse("1\nx", { // Old Mac
location: {
start: { offset: 2, line: 2, column: 1 },
end: { offset: 2, line: 2, column: 1 }
@ -1477,26 +1435,6 @@ describe("generated parser behavior", function() {
end: { offset: 3, line: 2, column: 1 }
}
});
expect(parser).toFailToParse("1\n\rx", { // mismatched
location: {
start: { offset: 3, line: 3, column: 1 },
end: { offset: 3, line: 3, column: 1 }
}
});
/* Strange newlines */
expect(parser).toFailToParse("1\u2028x", { // line separator
location: {
start: { offset: 2, line: 2, column: 1 },
end: { offset: 2, line: 2, column: 1 }
}
});
expect(parser).toFailToParse("1\u2029x", { // paragraph separator
location: {
start: { offset: 2, line: 2, column: 1 },
end: { offset: 2, line: 2, column: 1 }
}
});
});
});
});

Loading…
Cancel
Save