From df154daafb9c6c952351493af02d3a55e0b05c59 Mon Sep 17 00:00:00 2001 From: David Majda Date: Wed, 26 Mar 2014 20:04:49 +0100 Subject: [PATCH] PEG.js grammar: Disallow empty sequences Empty sequences are useless and they only confused users. Let's disallow them. --- lib/compiler/passes/generate-bytecode.js | 26 ++--- lib/parser.js | 20 +++- .../compiler/passes/generate-bytecode.spec.js | 103 ++++++++---------- spec/generated-parser.spec.js | 18 +-- spec/parser.spec.js | 11 -- src/parser.pegjs | 4 +- 6 files changed, 75 insertions(+), 107 deletions(-) diff --git a/lib/compiler/passes/generate-bytecode.js b/lib/compiler/passes/generate-bytecode.js index 407caa3..2ba346a 100644 --- a/lib/compiler/passes/generate-bytecode.js +++ b/lib/compiler/passes/generate-bytecode.js @@ -351,8 +351,6 @@ module.exports = function(ast) { }, sequence: function(node, context) { - var emptyArrayIndex; - function buildElementsCode(elements, context) { var processedCount, functionIndex; @@ -402,22 +400,16 @@ module.exports = function(ast) { } } - if (node.elements.length > 0) { - failedIndex = addConst('peg$FAILED'); - - return buildSequence( - [op.PUSH_CURR_POS], - buildElementsCode(node.elements, { - sp: context.sp + 1, - env: context.env, - action: context.action - }) - ); - } else { - emptyArrayIndex = addConst('[]'); + failedIndex = addConst('peg$FAILED'); - return [op.PUSH, emptyArrayIndex]; - } + return buildSequence( + [op.PUSH_CURR_POS], + buildElementsCode(node.elements, { + sp: context.sp + 1, + env: context.env, + action: context.action + }) + ); }, labeled: function(node, context) { diff --git a/lib/parser.js b/lib/parser.js index 3de9d26..5d5c861 100644 --- a/lib/parser.js +++ b/lib/parser.js @@ -676,9 +676,13 @@ module.exports = (function() { s0 = peg$currPos; s1 = []; s2 = peg$parselabeled(); - while (s2 !== peg$FAILED) { - s1.push(s2); - s2 = peg$parselabeled(); + if (s2 !== peg$FAILED) { + while (s2 !== peg$FAILED) { + s1.push(s2); + s2 = peg$parselabeled(); + } + } else { + s1 = peg$c0; } if (s1 !== peg$FAILED) { s2 = peg$parseaction(); @@ -698,9 +702,13 @@ module.exports = (function() { s0 = peg$currPos; s1 = []; s2 = peg$parselabeled(); - while (s2 !== peg$FAILED) { - s1.push(s2); - s2 = peg$parselabeled(); + if (s2 !== peg$FAILED) { + while (s2 !== peg$FAILED) { + s1.push(s2); + s2 = peg$parselabeled(); + } + } else { + s1 = peg$c0; } if (s1 !== peg$FAILED) { peg$reportedPos = s0; diff --git a/spec/compiler/passes/generate-bytecode.spec.js b/spec/compiler/passes/generate-bytecode.spec.js index 95d4099..d1523a0 100644 --- a/spec/compiler/passes/generate-bytecode.spec.js +++ b/spec/compiler/passes/generate-bytecode.spec.js @@ -85,24 +85,25 @@ describe("compiler pass |generateBytecode|", function() { describe("for action", function() { describe("without labels", function() { - var grammar = 'start = { code }'; + var grammar = 'start = "a" { code }'; it("generates correct bytecode", function() { expect(pass).toChangeAST(grammar, bytecodeDetails([ - 1, // PUSH_CURR_POS - 0, 0, // - 11, 6, 0, // IF_NOT_ERROR - 20, 1, // * REPORT_SAVED_POS - 22, 1, 1, 0, // CALL - 5 // NIP + 1, // PUSH_CURR_POS + 14, 0, 2, 2, 18, 0, 19, 1, // + 11, 6, 0, // IF_NOT_ERROR + 20, 1, // * REPORT_SAVED_POS + 22, 2, 1, 0, // CALL + 5 // NIP ])); }); it("defines correct constants", function() { - expect(pass).toChangeAST( - grammar, - constsDetails(['[]', 'function() { code }']) - ); + expect(pass).toChangeAST(grammar, constsDetails([ + '"a"', + '{ type: "literal", value: "a", description: "\\"a\\"" }', + 'function() { code }' + ])); }); }); @@ -171,57 +172,41 @@ describe("compiler pass |generateBytecode|", function() { }); describe("for sequence", function() { - describe("empty", function() { - var grammar = 'start = '; - - it("generates correct bytecode", function() { - expect(pass).toChangeAST(grammar, bytecodeDetails([ - 0, 0 // PUSH - ])); - }); + var grammar = 'start = "a" "b" "c"'; - it("defines correct constants", function() { - expect(pass).toChangeAST(grammar, constsDetails(['[]'])); - }); + it("generates correct bytecode", function() { + expect(pass).toChangeAST(grammar, bytecodeDetails([ + 1, // PUSH_CURR_POS + 14, 1, 2, 2, 18, 1, 19, 2, // + 11, 35, 4, // IF_NOT_ERROR + 14, 3, 2, 2, 18, 3, 19, 4, // * + 11, 19, 5, // IF_NOT_ERROR + 14, 5, 2, 2, 18, 5, 19, 6, // * + 11, 3, 5, // IF_NOT_ERROR + 7, 3, // * WRAP + 5, // NIP + 4, 3, // * POP_N + 3, // POP_CURR_POS + 0, 0, // PUSH + 4, 2, // * POP_N + 3, // POP_CURR_POS + 0, 0, // PUSH + 2, // * POP + 3, // POP_CURR_POS + 0, 0 // PUSH + ])); }); - describe("non-empty", function() { - var grammar = 'start = "a" "b" "c"'; - - it("generates correct bytecode", function() { - expect(pass).toChangeAST(grammar, bytecodeDetails([ - 1, // PUSH_CURR_POS - 14, 1, 2, 2, 18, 1, 19, 2, // - 11, 35, 4, // IF_NOT_ERROR - 14, 3, 2, 2, 18, 3, 19, 4, // * - 11, 19, 5, // IF_NOT_ERROR - 14, 5, 2, 2, 18, 5, 19, 6, // * - 11, 3, 5, // IF_NOT_ERROR - 7, 3, // * WRAP - 5, // NIP - 4, 3, // * POP_N - 3, // POP_CURR_POS - 0, 0, // PUSH - 4, 2, // * POP_N - 3, // POP_CURR_POS - 0, 0, // PUSH - 2, // * POP - 3, // POP_CURR_POS - 0, 0 // PUSH - ])); - }); - - it("defines correct constants", function() { - expect(pass).toChangeAST(grammar, constsDetails([ - 'peg$FAILED', - '"a"', - '{ type: "literal", value: "a", description: "\\"a\\"" }', - '"b"', - '{ type: "literal", value: "b", description: "\\"b\\"" }', - '"c"', - '{ type: "literal", value: "c", description: "\\"c\\"" }' - ])); - }); + it("defines correct constants", function() { + expect(pass).toChangeAST(grammar, constsDetails([ + 'peg$FAILED', + '"a"', + '{ type: "literal", value: "a", description: "\\"a\\"" }', + '"b"', + '{ type: "literal", value: "b", description: "\\"b\\"" }', + '"c"', + '{ type: "literal", value: "c", description: "\\"c\\"" }' + ])); }); }); diff --git a/spec/generated-parser.spec.js b/spec/generated-parser.spec.js index 4695474..daa7f35 100644 --- a/spec/generated-parser.spec.js +++ b/spec/generated-parser.spec.js @@ -393,13 +393,7 @@ describe("generated parser", function() { }); describe("sequence matching", function() { - it("matches empty sequence correctly", function() { - var parser = PEG.buildParser('start = ', options); - - expect(parser).toParse("", []); - }); - - it("matches non-empty sequence correctly", function() { + it("matches correctly", function() { var parser = PEG.buildParser('start = "a" "b" "c"', options); expect(parser).toParse("abc", ["a", "b", "c"]); @@ -845,9 +839,9 @@ describe("generated parser", function() { describe("expectations reporting", function() { it("reports expectations correctly with no alternative", function() { - var parser = PEG.buildParser('start = ', options); + var parser = PEG.buildParser('start = "a"', options); - expect(parser).toFailToParse("a", { + expect(parser).toFailToParse("ab", { expected: [{ type: "end", description: "end of input" }] }); }); @@ -916,10 +910,10 @@ describe("generated parser", function() { describe("message building", function() { it("builds message correctly with no alternative", function() { - var parser = PEG.buildParser('start = ', options); + var parser = PEG.buildParser('start = "a"', options); - expect(parser).toFailToParse("a", { - message: 'Expected end of input but "a" found.' + expect(parser).toFailToParse("ab", { + message: 'Expected end of input but "b" found.' }); }); diff --git a/spec/parser.spec.js b/spec/parser.spec.js index cd30efe..bb5235b 100644 --- a/spec/parser.spec.js +++ b/spec/parser.spec.js @@ -8,7 +8,6 @@ describe("PEG.js grammar parser", function() { labeledAbcd = { type: "labeled", label: "a", expression: literalAbcd }, labeledEfgh = { type: "labeled", label: "b", expression: literalEfgh }, labeledIjkl = { type: "labeled", label: "c", expression: literalIjkl }, - sequenceEmpty = { type: "sequence", elements: [] }, sequenceOfLiterals = { type: "sequence", elements: [literalAbcd, literalEfgh, literalIjkl] @@ -235,13 +234,6 @@ describe("PEG.js grammar parser", function() { /* Canonical sequence is "\"abcd\" \"efgh\" \"ijkl\"". */ it("parses sequence", function() { - expect('start = { code }').toParseAs( - oneRuleGrammar({ - type: "action", - expression: sequenceEmpty, - code: " code " - }) - ); expect('start = a:"abcd" { code }').toParseAs( oneRuleGrammar({ type: "action", expression: labeledAbcd, code: " code " }) ); @@ -253,9 +245,6 @@ describe("PEG.js grammar parser", function() { }) ); - expect('start = ').toParseAs( - oneRuleGrammar(sequenceEmpty) - ); expect('start = a:"abcd"').toParseAs( oneRuleGrammar(labeledAbcd) ); diff --git a/src/parser.pegjs b/src/parser.pegjs index b9efa6c..199935a 100644 --- a/src/parser.pegjs +++ b/src/parser.pegjs @@ -54,7 +54,7 @@ choice } sequence - = elements:labeled* code:action { + = elements:labeled+ code:action { var expression = elements.length !== 1 ? { type: "sequence", @@ -67,7 +67,7 @@ sequence code: code }; } - / elements:labeled* { + / elements:labeled+ { return elements.length !== 1 ? { type: "sequence",