From 319931876d632d1a64efdf795b3b8910990f161a Mon Sep 17 00:00:00 2001 From: David Majda Date: Fri, 17 Jun 2016 15:14:47 +0200 Subject: [PATCH] Expectation refactoring 4/7: Generate descriptions dynamically Instead of pre-generating expectation descriptions when generating parsers, generate them dynamically from structured information contained in the expectations. This change makes descriptions a presentation-only concept. It also makes generated parsers smaller. --- lib/compiler/passes/generate-bytecode.js | 53 +------------- lib/compiler/passes/generate-js.js | 73 ++++++++++++++++--- .../generated-parser-behavior.spec.js | 28 +++---- .../compiler/passes/generate-bytecode.spec.js | 64 ++++++++-------- 4 files changed, 112 insertions(+), 106 deletions(-) diff --git a/lib/compiler/passes/generate-bytecode.js b/lib/compiler/passes/generate-bytecode.js index b009b92..4b5f2f6 100644 --- a/lib/compiler/passes/generate-bytecode.js +++ b/lib/compiler/passes/generate-bytecode.js @@ -280,36 +280,6 @@ function generateBytecode(ast) { ); } - function hex(ch) { - return ch.charCodeAt(0).toString(16).toUpperCase(); - } - - function literalDescriptionEscape(s) { - return s - .replace(/\\/g, '\\\\') // backslash - .replace(/"/g, '\\"') // closing double quote - .replace(/\0/g, '\\0') // null - .replace(/\t/g, '\\t') // horizontal tab - .replace(/\n/g, '\\n') // line feed - .replace(/\r/g, '\\r') // carriage return - .replace(/[\x00-\x0F]/g, function(ch) { return '\\x0' + hex(ch); }) - .replace(/[\x10-\x1F\x7F-\x9F]/g, function(ch) { return '\\x' + hex(ch); }); - } - - function classDescriptionEscape(s) { - return s - .replace(/\\/g, '\\\\') // backslash - .replace(/\]/g, '\\]') // closing bracket - .replace(/\^/g, '\\^') // caret - .replace(/-/g, '\\-') // dash - .replace(/\0/g, '\\0') // null - .replace(/\t/g, '\\t') // horizontal tab - .replace(/\n/g, '\\n') // line feed - .replace(/\r/g, '\\r') // carriage return - .replace(/[\x00-\x0F]/g, function(ch) { return '\\x0' + hex(ch); }) - .replace(/[\x10-\x1F\x7F-\x9F]/g, function(ch) { return '\\x' + hex(ch); }); - } - var generate = visitor.build({ grammar: function(node) { arrays.each(node.rules, generate); @@ -574,10 +544,7 @@ function generateBytecode(ast) { '{', 'type: "literal",', 'text: "' + js.stringEscape(node.value) + '",', - 'ignoreCase: ' + node.ignoreCase + ',', - 'description: "' - + js.stringEscape('"' + literalDescriptionEscape(node.value) + '"') - + '"', + 'ignoreCase: ' + node.ignoreCase, '}' ].join(' ')); @@ -603,7 +570,7 @@ function generateBytecode(ast) { }, "class": function(node) { - var regexp, parts, description, regexpIndex, expectedIndex; + var regexp, parts, regexpIndex, expectedIndex; if (node.parts.length > 0) { regexp = '/^[' @@ -632,25 +599,13 @@ function generateBytecode(ast) { }).join(', ') + ']'; - description = "[" - + (node.inverted ? "^" : "") - + arrays.map(node.parts, function(part) { - return part instanceof Array - ? classDescriptionEscape(part[0]) - + "-" - + classDescriptionEscape(part[1]) - : classDescriptionEscape(part); - }).join("") - + "]"; - regexpIndex = addConst(regexp); expectedIndex = addConst([ '{', 'type: "class",', 'parts: ' + parts + ',', 'inverted: ' + node.inverted + ',', - 'ignoreCase: ' + node.ignoreCase + ',', - 'description: "' + js.stringEscape(description) + '"', + 'ignoreCase: ' + node.ignoreCase, '}' ].join(' ')); @@ -662,7 +617,7 @@ function generateBytecode(ast) { }, any: function() { - var expectedIndex = addConst('{ type: "any", description: "any character" }'); + var expectedIndex = addConst('{ type: "any" }'); return buildCondition( [op.MATCH_ANY], diff --git a/lib/compiler/passes/generate-js.js b/lib/compiler/passes/generate-js.js index 90aec45..57900bf 100644 --- a/lib/compiler/passes/generate-js.js +++ b/lib/compiler/passes/generate-js.js @@ -1046,25 +1046,76 @@ function generateJS(ast, options) { '', ' function peg$buildException(message, expected, found, location) {', ' function buildMessage(expected, found) {', - ' function escape(s) {', - ' function hex(ch) { return ch.charCodeAt(0).toString(16).toUpperCase(); }', + ' var DESCRIBE_EXPECTATION_FNS = {', + ' literal: function(expectation) {', + ' return "\\\"" + literalEscape(expectation.text) + "\\\"";', + ' },', + '', + ' class: function(expectation) {', + ' var escapedParts = "",', + ' i;', + '', + ' for (i = 0; i < expectation.parts.length; i++) {', + ' escapedParts += expectation.parts[i] instanceof Array', + ' ? classEscape(expectation.parts[i][0]) + "-" + classEscape(expectation.parts[i][1])', + ' : classEscape(expectation.parts[i]);', + ' }', + '', + ' return "[" + (expectation.inverted ? "^" : "") + escapedParts + "]";', + ' },', + '', + ' any: function(expectation) {', + ' return "any character";', + ' },', + '', + ' end: function(expectation) {', + ' return "end of input";', + ' },', + '', + ' other: function(expectation) {', + ' return expectation.description;', + ' }', + ' };', + '', + ' function hex(ch) {', + ' return ch.charCodeAt(0).toString(16).toUpperCase();', + ' }', + '', + ' function literalEscape(s) {', + ' return s', + ' .replace(/\\\\/g, \'\\\\\\\\\')', // backslash + ' .replace(/"/g, \'\\\\"\')', // closing double quote + ' .replace(/\\0/g, \'\\\\0\')', // null + ' .replace(/\\t/g, \'\\\\t\')', // horizontal tab + ' .replace(/\\n/g, \'\\\\n\')', // line feed + ' .replace(/\\r/g, \'\\\\r\')', // carriage return + ' .replace(/[\\x00-\\x0F]/g, function(ch) { return \'\\\\x0\' + hex(ch); })', + ' .replace(/[\\x10-\\x1F\\x7F-\\x9F]/g, function(ch) { return \'\\\\x\' + hex(ch); });', + ' }', '', + ' function classEscape(s) {', ' return s', - ' .replace(/\\\\/g, \'\\\\\\\\\')', // backslash - ' .replace(/"/g, \'\\\\"\')', // closing double quote - ' .replace(/\\0/g, \'\\\\0\')', // null - ' .replace(/\\t/g, \'\\\\t\')', // horizontal tab - ' .replace(/\\n/g, \'\\\\n\')', // line feed - ' .replace(/\\r/g, \'\\\\r\')', // carriage return + ' .replace(/\\\\/g, \'\\\\\\\\\')', // backslash + ' .replace(/\\]/g, \'\\\\]\')', // closing bracket + ' .replace(/\\^/g, \'\\\\^\')', // caret + ' .replace(/-/g, \'\\\\-\')', // dash + ' .replace(/\\0/g, \'\\\\0\')', // null + ' .replace(/\\t/g, \'\\\\t\')', // horizontal tab + ' .replace(/\\n/g, \'\\\\n\')', // line feed + ' .replace(/\\r/g, \'\\\\r\')', // carriage return ' .replace(/[\\x00-\\x0F]/g, function(ch) { return \'\\\\x0\' + hex(ch); })', ' .replace(/[\\x10-\\x1F\\x7F-\\x9F]/g, function(ch) { return \'\\\\x\' + hex(ch); });', ' }', '', + ' function describeExpectation(expectation) {', + ' return DESCRIBE_EXPECTATION_FNS[expectation.type](expectation);', + ' }', + '', ' var expectedDescs = new Array(expected.length),', ' expectedDesc, foundDesc, i, j;', '', ' for (i = 0; i < expected.length; i++) {', - ' expectedDescs[i] = expected[i].description;', + ' expectedDescs[i] = describeExpectation(expected[i]);', ' }', '', ' expectedDescs.sort();', @@ -1085,7 +1136,7 @@ function generateJS(ast, options) { ' + expectedDescs[expectedDescs.length - 1]', ' : expectedDescs[0];', '', - ' foundDesc = found ? "\\"" + escape(found) + "\\"" : "end of input";', + ' foundDesc = found ? "\\"" + literalEscape(found) + "\\"" : "end of input";', '', ' return "Expected " + expectedDesc + " but " + foundDesc + " found.";', ' }', @@ -1127,7 +1178,7 @@ function generateJS(ast, options) { ' return peg$result;', ' } else {', ' if (peg$result !== peg$FAILED && peg$currPos < input.length) {', - ' peg$fail({ type: "end", description: "end of input" });', + ' peg$fail({ type: "end" });', ' }', '', ' throw peg$buildException(', diff --git a/spec/behavior/generated-parser-behavior.spec.js b/spec/behavior/generated-parser-behavior.spec.js index fdec845..eee64c5 100644 --- a/spec/behavior/generated-parser-behavior.spec.js +++ b/spec/behavior/generated-parser-behavior.spec.js @@ -190,7 +190,7 @@ describe("generated parser behavior", function() { var parser = peg.generate('start = "a"'); expect(parser).toFailToParse("b", { - expected: [{ type: "literal", text: "a", ignoreCase: false, description: '"a"' }] + expected: [{ type: "literal", text: "a", ignoreCase: false }] }); }); }); @@ -271,7 +271,7 @@ describe("generated parser behavior", function() { var parser = peg.generate('start = "a"', options); expect(parser).toFailToParse("b", { - expected: [{ type: "literal", text: "a", ignoreCase: false, description: '"a"' }] + expected: [{ type: "literal", text: "a", ignoreCase: false }] }); }); }); @@ -344,7 +344,7 @@ describe("generated parser behavior", function() { var parser = peg.generate('start = [a]', options); expect(parser).toFailToParse("b", { - expected: [{ type: "class", parts: ["a"], inverted: false, ignoreCase: false, description: "[a]" }] + expected: [{ type: "class", parts: ["a"], inverted: false, ignoreCase: false }] }); }); }); @@ -380,7 +380,7 @@ describe("generated parser behavior", function() { var parser = peg.generate('start = .', options); expect(parser).toFailToParse("", { - expected: [{ type: "any", description: "any character" }] + expected: [{ type: "any" }] }); }); }); @@ -932,8 +932,8 @@ describe("generated parser behavior", function() { expect(parser).toFailToParse("d", { expected: [ - { type: "literal", text: "a", ignoreCase: false, description: '"a"' }, - { type: "literal", text: "c", ignoreCase: false, description: '"c"' } + { type: "literal", text: "a", ignoreCase: false }, + { type: "literal", text: "c", ignoreCase: false } ] }); }); @@ -967,8 +967,8 @@ describe("generated parser behavior", function() { expect(parser).toFailToParse("b", { expected: [ - { type: "literal", text: "a", ignoreCase: false, description: '"a"' }, - { type: "literal", text: "c", ignoreCase: false, description: '"c"' } + { type: "literal", text: "a", ignoreCase: false }, + { type: "literal", text: "c", ignoreCase: false } ] }); }); @@ -1331,7 +1331,7 @@ describe("generated parser behavior", function() { var parser = peg.generate('start = "a" "b" / "a" "c" "d"', options); expect(parser).toFailToParse("ace", { - expected: [{ type: "literal", text: "d", ignoreCase: false, description: '"d"' }] + expected: [{ type: "literal", text: "d", ignoreCase: false }] }); }); }); @@ -1341,7 +1341,7 @@ describe("generated parser behavior", function() { var parser = peg.generate('start = "a"', options); expect(parser).toFailToParse("ab", { - expected: [{ type: "end", description: "end of input" }] + expected: [{ type: "end" }] }); }); @@ -1349,7 +1349,7 @@ describe("generated parser behavior", function() { var parser = peg.generate('start = "a"', options); expect(parser).toFailToParse("b", { - expected: [{ type: "literal", text: "a", ignoreCase: false, description: '"a"' }] + expected: [{ type: "literal", text: "a", ignoreCase: false }] }); }); @@ -1358,9 +1358,9 @@ describe("generated parser behavior", function() { expect(parser).toFailToParse("d", { expected: [ - { type: "literal", text: "a", ignoreCase: false, description: '"a"' }, - { type: "literal", text: "b", ignoreCase: false, description: '"b"' }, - { type: "literal", text: "c", ignoreCase: false, description: '"c"' } + { type: "literal", text: "a", ignoreCase: false }, + { type: "literal", text: "b", ignoreCase: false }, + { type: "literal", text: "c", ignoreCase: false } ] }); }); diff --git a/spec/unit/compiler/passes/generate-bytecode.spec.js b/spec/unit/compiler/passes/generate-bytecode.spec.js index a73effd..95592ce 100644 --- a/spec/unit/compiler/passes/generate-bytecode.spec.js +++ b/spec/unit/compiler/passes/generate-bytecode.spec.js @@ -35,11 +35,11 @@ describe("compiler pass |generateBytecode|", function() { 'c = "c"' ].join("\n"), constsDetails([ '"a"', - '{ type: "literal", text: "a", ignoreCase: false, description: "\\"a\\"" }', + '{ type: "literal", text: "a", ignoreCase: false }', '"b"', - '{ type: "literal", text: "b", ignoreCase: false, description: "\\"b\\"" }', + '{ type: "literal", text: "b", ignoreCase: false }', '"c"', - '{ type: "literal", text: "c", ignoreCase: false, description: "\\"c\\"" }' + '{ type: "literal", text: "c", ignoreCase: false }' ])); }); }); @@ -69,7 +69,7 @@ describe("compiler pass |generateBytecode|", function() { expect(pass).toChangeAST(grammar, constsDetails([ '{ type: "other", description: "start" }', '"a"', - '{ type: "literal", text: "a", ignoreCase: false, description: "\\"a\\"" }' + '{ type: "literal", text: "a", ignoreCase: false }' ])); }); }); @@ -106,7 +106,7 @@ describe("compiler pass |generateBytecode|", function() { it("defines correct constants", function() { expect(pass).toChangeAST(grammar, constsDetails([ '"a"', - '{ type: "literal", text: "a", ignoreCase: false, description: "\\"a\\"" }', + '{ type: "literal", text: "a", ignoreCase: false }', 'function() { code }' ])); }); @@ -129,7 +129,7 @@ describe("compiler pass |generateBytecode|", function() { it("defines correct constants", function() { expect(pass).toChangeAST(grammar, constsDetails([ '"a"', - '{ type: "literal", text: "a", ignoreCase: false, description: "\\"a\\"" }', + '{ type: "literal", text: "a", ignoreCase: false }', 'function(a) { code }' ])); }); @@ -165,11 +165,11 @@ describe("compiler pass |generateBytecode|", function() { it("defines correct constants", function() { expect(pass).toChangeAST(grammar, constsDetails([ '"a"', - '{ type: "literal", text: "a", ignoreCase: false, description: "\\"a\\"" }', + '{ type: "literal", text: "a", ignoreCase: false }', '"b"', - '{ type: "literal", text: "b", ignoreCase: false, description: "\\"b\\"" }', + '{ type: "literal", text: "b", ignoreCase: false }', '"c"', - '{ type: "literal", text: "c", ignoreCase: false, description: "\\"c\\"" }', + '{ type: "literal", text: "c", ignoreCase: false }', 'function(a, b, c) { code }' ])); }); @@ -205,11 +205,11 @@ describe("compiler pass |generateBytecode|", function() { it("defines correct constants", function() { expect(pass).toChangeAST(grammar, constsDetails([ '"a"', - '{ type: "literal", text: "a", ignoreCase: false, description: "\\"a\\"" }', + '{ type: "literal", text: "a", ignoreCase: false }', '"b"', - '{ type: "literal", text: "b", ignoreCase: false, description: "\\"b\\"" }', + '{ type: "literal", text: "b", ignoreCase: false }', '"c"', - '{ type: "literal", text: "c", ignoreCase: false, description: "\\"c\\"" }' + '{ type: "literal", text: "c", ignoreCase: false }' ])); }); }); @@ -257,7 +257,7 @@ describe("compiler pass |generateBytecode|", function() { it("defines correct constants", function() { expect(pass).toChangeAST(grammar, constsDetails([ '"a"', - '{ type: "literal", text: "a", ignoreCase: false, description: "\\"a\\"" }' + '{ type: "literal", text: "a", ignoreCase: false }' ])); }); }); @@ -284,7 +284,7 @@ describe("compiler pass |generateBytecode|", function() { it("defines correct constants", function() { expect(pass).toChangeAST(grammar, constsDetails([ '"a"', - '{ type: "literal", text: "a", ignoreCase: false, description: "\\"a\\"" }' + '{ type: "literal", text: "a", ignoreCase: false }' ])); }); }); @@ -304,7 +304,7 @@ describe("compiler pass |generateBytecode|", function() { it("defines correct constants", function() { expect(pass).toChangeAST(grammar, constsDetails([ '"a"', - '{ type: "literal", text: "a", ignoreCase: false, description: "\\"a\\"" }' + '{ type: "literal", text: "a", ignoreCase: false }' ])); }); }); @@ -326,7 +326,7 @@ describe("compiler pass |generateBytecode|", function() { it("defines correct constants", function() { expect(pass).toChangeAST(grammar, constsDetails([ '"a"', - '{ type: "literal", text: "a", ignoreCase: false, description: "\\"a\\"" }' + '{ type: "literal", text: "a", ignoreCase: false }' ])); }); }); @@ -352,7 +352,7 @@ describe("compiler pass |generateBytecode|", function() { it("defines correct constants", function() { expect(pass).toChangeAST(grammar, constsDetails([ '"a"', - '{ type: "literal", text: "a", ignoreCase: false, description: "\\"a\\"" }' + '{ type: "literal", text: "a", ignoreCase: false }' ])); }); }); @@ -429,11 +429,11 @@ describe("compiler pass |generateBytecode|", function() { it("defines correct constants", function() { expect(pass).toChangeAST(grammar, constsDetails([ '"a"', - '{ type: "literal", text: "a", ignoreCase: false, description: "\\"a\\"" }', + '{ type: "literal", text: "a", ignoreCase: false }', '"b"', - '{ type: "literal", text: "b", ignoreCase: false, description: "\\"b\\"" }', + '{ type: "literal", text: "b", ignoreCase: false }', '"c"', - '{ type: "literal", text: "c", ignoreCase: false, description: "\\"c\\"" }', + '{ type: "literal", text: "c", ignoreCase: false }', 'function(a, b, c) { code }' ])); }); @@ -504,11 +504,11 @@ describe("compiler pass |generateBytecode|", function() { it("defines correct constants", function() { expect(pass).toChangeAST(grammar, constsDetails([ '"a"', - '{ type: "literal", text: "a", ignoreCase: false, description: "\\"a\\"" }', + '{ type: "literal", text: "a", ignoreCase: false }', '"b"', - '{ type: "literal", text: "b", ignoreCase: false, description: "\\"b\\"" }', + '{ type: "literal", text: "b", ignoreCase: false }', '"c"', - '{ type: "literal", text: "c", ignoreCase: false, description: "\\"c\\"" }', + '{ type: "literal", text: "c", ignoreCase: false }', 'function(a, b, c) { code }' ])); }); @@ -560,7 +560,7 @@ describe("compiler pass |generateBytecode|", function() { it("defines correct constants", function() { expect(pass).toChangeAST(grammar, constsDetails([ '"a"', - '{ type: "literal", text: "a", ignoreCase: false, description: "\\"a\\"" }' + '{ type: "literal", text: "a", ignoreCase: false }' ])); }); }); @@ -579,7 +579,7 @@ describe("compiler pass |generateBytecode|", function() { it("defines correct constants", function() { expect(pass).toChangeAST(grammar, constsDetails([ '"a"', - '{ type: "literal", text: "A", ignoreCase: true, description: "\\"A\\"" }' + '{ type: "literal", text: "A", ignoreCase: true }' ])); }); }); @@ -598,7 +598,7 @@ describe("compiler pass |generateBytecode|", function() { it("defines correct constants", function() { expect(pass).toChangeAST('start = [a]', constsDetails([ '/^[a]/', - '{ type: "class", parts: ["a"], inverted: false, ignoreCase: false, description: "[a]" }' + '{ type: "class", parts: ["a"], inverted: false, ignoreCase: false }' ])); }); }); @@ -607,7 +607,7 @@ describe("compiler pass |generateBytecode|", function() { it("defines correct constants", function() { expect(pass).toChangeAST('start = [^a]', constsDetails([ '/^[^a]/', - '{ type: "class", parts: ["a"], inverted: true, ignoreCase: false, description: "[^a]" }' + '{ type: "class", parts: ["a"], inverted: true, ignoreCase: false }' ])); }); }); @@ -616,7 +616,7 @@ describe("compiler pass |generateBytecode|", function() { it("defines correct constants", function() { expect(pass).toChangeAST('start = [a]i', constsDetails([ '/^[a]/i', - '{ type: "class", parts: ["a"], inverted: false, ignoreCase: true, description: "[a]" }' + '{ type: "class", parts: ["a"], inverted: false, ignoreCase: true }' ])); }); }); @@ -625,7 +625,7 @@ describe("compiler pass |generateBytecode|", function() { it("defines correct constants", function() { expect(pass).toChangeAST('start = [ab-def-hij-l]', constsDetails([ '/^[ab-def-hij-l]/', - '{ type: "class", parts: ["a", ["b", "d"], "e", ["f", "h"], "i", ["j", "l"]], inverted: false, ignoreCase: false, description: "[ab-def-hij-l]" }' + '{ type: "class", parts: ["a", ["b", "d"], "e", ["f", "h"], "i", ["j", "l"]], inverted: false, ignoreCase: false }' ])); }); }); @@ -634,7 +634,7 @@ describe("compiler pass |generateBytecode|", function() { it("defines correct constants", function() { expect(pass).toChangeAST('start = []', constsDetails([ '/^(?!)/', - '{ type: "class", parts: [], inverted: false, ignoreCase: false, description: "[]" }' + '{ type: "class", parts: [], inverted: false, ignoreCase: false }' ])); }); }); @@ -643,7 +643,7 @@ describe("compiler pass |generateBytecode|", function() { it("defines correct constants", function() { expect(pass).toChangeAST('start = [^]', constsDetails([ '/^[\\S\\s]/', - '{ type: "class", parts: [], inverted: true, ignoreCase: false, description: "[^]" }' + '{ type: "class", parts: [], inverted: true, ignoreCase: false }' ])); }); }); @@ -663,7 +663,7 @@ describe("compiler pass |generateBytecode|", function() { it("defines correct constants", function() { expect(pass).toChangeAST( grammar, - constsDetails(['{ type: "any", description: "any character" }']) + constsDetails(['{ type: "any" }']) ); }); });