Refine error handling further

Before this commit, the |expected| and |error| functions didn't halt the parsing immediately, but triggered a regular match failure. After they were called, the parser could backtrack, try another branches, and only if no other branch succeeded, it triggered an exception with information possibly based on parameters passed to the |expected| or |error| function (this depended on positions where failures in other branches have occurred). While nice in theory, this solution didn't work well in practice. There were at least two problems: 1. Action expression could have easily triggered a match failure later in the input than the action itself. This resulted in the action-triggered failure to be shadowed by the expression-triggered one. Consider the following example: integer = digits:[0-9]+ { var result = parseInt(digits.join(""), 10); if (result % 2 === 0) { error("The number must be an odd integer."); return; } return result; } Given input "2", the |[0-9]+| expression would record a match failure at position 1 (an unsuccessful attempt to parse yet another digit after "2"). However, a failure triggered by the |error| call would occur at position 0. This problem could have been solved by silencing match failures in action expressions, but that would lead to severe performance problems (yes, I tried and measured). Other possible solutions are hacks which I didn't want to introduce into PEG.js. 2. Triggering a match failure in action code could have lead to unexpected backtracking. Consider the following example: class = "[" (charRange / char)* "]" charRange = begin:char "-" end:char { if (begin.data.charCodeAt(0) > end.data.charCodeAt(0)) { error("Invalid character range: " + begin + "-" + end + "."); } // ... } char = [a-zA-Z0-9_\-] Given input "[b-a]", the |charRange| rule would fail, but the parser would try the |char| rule and succeed repeatedly, resulting in "b-a" being parsed as a sequence of three |char|'s, which it is not. This problem could have been solved by using negative predicates, but that would complicate the grammar and still wouldn't get rid of unintuitive behavior. Given these problems I decided to change the semantics of the |expected| and |error| functions. They don't interact with regular match failure mechanism anymore, but they cause and immediate parse failure by throwing an exception. I think this is more intuitive behavior with less harmful side effects. The disadvantage of the new approach is that one can't backtrack from an action-triggered error. I don't see this as a big deal as I think this will be rarely needed and one can always use a semantic predicate as a workaround. Speed impact ------------ Before: 993.84 kB/s After: 998.05 kB/s Difference: 0.42% Size impact ----------- Before: 1019968 b After: 975434 b Difference: -4.37% (Measured by /tools/impact with Node.js v0.6.18 on x86_64 GNU/Linux.)
11 years ago · 2f2152204a
parent 9fa7301aec
commit 2f2152204a
10 changed files with 206 additions and 462 deletions
--- a/1
+++ b/1
@ -9,7 +9,6 @@ MODULES = utils                                 \
          grammar-error                         \
          parser                                \
          compiler/opcodes                      \
-          compiler/flags                        \
          compiler/passes/generate-bytecode     \
          compiler/passes/generate-javascript   \
          compiler/passes/remove-proxy-rules    \
--- a/README.md
+++ b/README.md
@ -376,14 +376,14 @@ expression as its arguments. The action should return some JavaScript value
 using the `return` statement. This value is considered match result of the
 preceding expression.

-To indicate a match failure, the code inside the action can invoke the
-`expected` function. It takes one parameter — a description of what was expected
-at the current position. This description will be used as part of a message of
-the exception thrown if the match failure leads to an parse error.
-
-The code inside an action can also invoke the `error` function. It takes one
-parameter — an error message. This message will be used by the exception thrown
-if the match failure leads to an parse error.
+To indicate an error, the code inside the action can invoke the `expected`
+function, which makes the parser throw an exception. The function takes one
+parameter — a description of what was expected at the current position. This
+description will be used as part of a message of the thrown exception.
+
+The code inside an action can also invoke the `error` function, which also makes
+the parser throw an exception. The function takes one parameter — an error
+message. This message will be used by the thrown exception.

 The code inside the action can access all variables and functions defined in the
 initializer at the beginning of the grammar. Curly braces in the action code
--- a/lib/compiler/flags.js
+++ b/lib/compiler/flags.js
@ -1,6 +0,0 @@
-/* Bytecode instruction flags. */
-module.exports = {
-  DONT_CHECK_FAILS: 0,
-  CHECK_FAILS:      1
-};
-
--- a/lib/compiler/passes/generate-bytecode.js
+++ b/lib/compiler/passes/generate-bytecode.js
@ -1,6 +1,5 @@
 var utils = require("../../utils"),
-    op    = require("../opcodes"),
-    flags = require("../flags");
+    op    = require("../opcodes");

 /* Generates bytecode.
 *
@ -152,9 +151,9 @@ var utils = require("../../utils"),
 *
 *        reportedPos = currPos;
 *
- * [23] CALL f, c, n, pc, p1, p2, ..., pN
+ * [23] CALL f, n, pc, p1, p2, ..., pN
 *
- *        value = call(consts[f], c, stack[p1], ..., stack[pN]);
+ *        value = consts[f](stack[p1], ..., stack[pN]);
 *        stack.pop(n);
 *        stack.push(value);
 *
@ -207,16 +206,10 @@ module.exports = function(ast, options) {
    return condCode.concat([bodyCode.length], bodyCode);
  }

-  function buildCall(functionIndex, checkFails, delta, env, sp) {
+  function buildCall(functionIndex, delta, env, sp) {
    var params = utils.map( utils.values(env), function(p) { return sp - p; });

-    return [
-      op.CALL,
-      functionIndex,
-      checkFails,
-      delta,
-      params.length
-    ].concat(params);
+    return [op.CALL, functionIndex, delta, params.length].concat(params);
  }

  function buildSimplePredicate(expression, negative, context) {
@ -255,13 +248,7 @@ module.exports = function(ast, options) {

    return buildSequence(
      [op.REPORT_CURR_POS],
-      buildCall(
-        functionIndex,
-        flags.DONT_CHECK_FAILS,
-        0,
-        context.env,
-        context.sp
-      ),
+      buildCall(functionIndex, 0, context.env, context.sp),
      buildCondition(
        [op.IF],
        buildSequence(
@ -360,13 +347,7 @@ module.exports = function(ast, options) {
              [op.IF_NOT_ERROR],
              buildSequence(
                [op.REPORT_SAVED_POS, 1],
-                buildCall(
-                  functionIndex,
-                  flags.CHECK_FAILS,
-                  1,
-                  env,
-                  context.sp + 2
-                )
+                buildCall(functionIndex, 1, env, context.sp + 2)
              ),
              []
            ),
@ -415,7 +396,6 @@ module.exports = function(ast, options) {
              [op.REPORT_SAVED_POS, node.elements.length],
              buildCall(
                functionIndex,
-                flags.CHECK_FAILS,
                node.elements.length,
                context.env,
                context.sp
--- a/lib/compiler/passes/generate-javascript.js
+++ b/lib/compiler/passes/generate-javascript.js
@ -1,6 +1,5 @@
 var utils = require("../../utils"),
-    op    = require("../opcodes"),
-    flags = require("../flags");
+    op    = require("../opcodes");

 /* Generates parser JavaScript code. */
 module.exports = function(ast, options) {
@ -103,7 +102,7 @@ module.exports = function(ast, options) {
    }

    function generateCall() {
-      var baseLength       = 5,
+      var baseLength       = 4,
          paramsLengthCode = 'bc[ip + ' + (baseLength - 1) + ']';

      return [
@ -112,15 +111,11 @@ module.exports = function(ast, options) {
        '  params[i] = stack[stack.length - 1 - params[i]];',
        '}',
        '',
-        'if (bc[ip + 2] === ' + flags.CHECK_FAILS + ') {',
-        '  peg$userFail = false;',
-        '}',
-        'result = peg$consts[bc[ip + 1]].apply(null, params);',
-        'if (bc[ip + 2] === ' + flags.CHECK_FAILS + ') {',
-        '  if (peg$userFail) { result = peg$FAILED; }',
-        '}',
-        '',
-        'stack.splice(stack.length - bc[ip + 3], bc[ip + 3], result);',
+        'stack.splice(',
+        '  stack.length - bc[ip + 2],',
+        '  bc[ip + 2],',
+        '  peg$consts[bc[ip + 1]].apply(null, params)',
+        ');',
        '',
        'ip += ' + baseLength + ' + ' + paramsLengthCode + ';',
        'break;'
@ -145,7 +140,7 @@ module.exports = function(ast, options) {
      '      end   = bc.length,',
      '      ends  = [],',
      '      stack = [],',
-      '      params, result, i;',
+      '      params, i;',
      ''
    ].join('\n'));

@ -277,7 +272,7 @@ module.exports = function(ast, options) {
      '        case ' + op.FAIL + ':',             // FAIL e
      '          stack.push(peg$FAILED);',
      '          if (peg$silentFails === 0) {',
-      '            peg$expected(peg$consts[bc[ip + 1]], peg$currPos);',
+      '            peg$fail(peg$consts[bc[ip + 1]]);',
      '          }',
      '          ip += 2;',
      '          break;',
@ -424,7 +419,7 @@ module.exports = function(ast, options) {
      }

      function compileCall(cond) {
-        var baseLength   = 5,
+        var baseLength   = 4,
            paramsLength = bc[ip + baseLength - 1];

        var value = c(bc[ip + 1]) + '('
@ -433,14 +428,8 @@ module.exports = function(ast, options) {
                  stackIndex
                ).join(', ')
              + ')';
-        stack.pop(bc[ip + 3]);
-        if (bc[ip + 2] === flags.CHECK_FAILS) {
-          parts.push('peg$userFail = false;');
-        }
+        stack.pop(bc[ip + 2]);
        parts.push(stack.push(value));
-        if (bc[ip + 2] === flags.CHECK_FAILS) {
-          parts.push('if (peg$userFail) { ' + stack.top() + ' = peg$FAILED; }');
-        }
        ip += baseLength + paramsLength;
      }

@ -597,7 +586,7 @@ module.exports = function(ast, options) {

          case op.FAIL:             // FAIL e
            parts.push(stack.push('peg$FAILED'));
-            parts.push('if (peg$silentFails === 0) { peg$expected(' + c(bc[ip + 1]) + ', peg$currPos); }');
+            parts.push('if (peg$silentFails === 0) { peg$fail(' + c(bc[ip + 1]) + '); }');
            ip += 2;
            break;

@ -745,9 +734,7 @@ module.exports = function(ast, options) {
    '        peg$cachedPosDetails = { line: 1, column: 1, seenCR: false },',
    '        peg$maxFailPos       = 0,',
    '        peg$maxFailExpected  = [],',
-    '        peg$maxFailMessage   = null,',
    '        peg$silentFails      = 0,', // 0 = report failures, > 0 = silence failures
-    '        peg$userFail         = false,',
    ''
  ].join('\n'));

@ -801,20 +788,15 @@ module.exports = function(ast, options) {
    '    }',
    '',
    '    function expected(description) {',
-    '      if (peg$silentFails === 0) {',
-    '        peg$expected(',
-    '          { type: "other", description: description },',
+    '      throw peg$buildException(',
+    '        null,',
+    '        [{ type: "other", description: description }],',
    '        peg$reportedPos',
    '      );',
    '    }',
-    '      peg$userFail = true;',
-    '    }',
    '',
    '    function error(message) {',
-    '      if (peg$silentFails === 0) {',
-    '        peg$error(message, peg$reportedPos);',
-    '      }',
-    '      peg$userFail = true;',
+    '      throw peg$buildException(message, null, peg$reportedPos);',
    '    }',
    '',
    '    function peg$computePosDetails(pos) {',
@ -850,31 +832,18 @@ module.exports = function(ast, options) {
    '      return peg$cachedPosDetails;',
    '    }',
    '',
-    '    function peg$expected(expected, pos) {',
-    '      if (pos < peg$maxFailPos) { return; }',
+    '    function peg$fail(expected) {',
+    '      if (peg$currPos < peg$maxFailPos) { return; }',
    '',
-    '      if (pos > peg$maxFailPos) {',
-    '        peg$maxFailPos = pos;',
+    '      if (peg$currPos > peg$maxFailPos) {',
+    '        peg$maxFailPos = peg$currPos;',
    '        peg$maxFailExpected = [];',
-    '        peg$maxFailMessage = null;',
    '      }',
    '',
    '      peg$maxFailExpected.push(expected);',
    '    }',
    '',
-    '    function peg$error(message, pos) {',
-    '      if (pos < peg$maxFailPos) { return; }',
-    '',
-    '      if (pos > peg$maxFailPos) {',
-    '        peg$maxFailPos = pos;',
-    '        peg$maxFailExpected = [];',
-    '        peg$maxFailMessage = null;',
-    '      }',
-    '',
-    '      peg$maxFailMessage = message;',
-    '    }',
-    '',
-    '    function peg$buildException() {',
+    '    function peg$buildException(message, expected, pos) {',
    '      function cleanupExpected(expected) {',
    '        var i = 1;',
    '',
@ -888,8 +857,13 @@ module.exports = function(ast, options) {
    '          }',
    '        });',
    '',
+    /*
+     * This works because the bytecode generator guarantees that every
+     * expectation object exists only once, so it's enough to use |===| instead
+     * of deeper structural comparison.
+     */
    '        while (i < expected.length) {',
-    '          if (expected[i - 1].description === expected[i].description) {',
+    '          if (expected[i - 1] === expected[i]) {',
    '            expected.splice(i, 1);',
    '          } else {',
    '            i++;',
@ -953,20 +927,15 @@ module.exports = function(ast, options) {
    '        return "Expected " + expectedDesc + " but " + foundDesc + " found.";',
    '      }',
    '',
-    '      var pos        = Math.max(peg$currPos, peg$maxFailPos),',
-    '          posDetails = peg$computePosDetails(pos),',
-    '          expected   = peg$maxFailMessage === null ? peg$maxFailExpected : null,',
-    '          found      = pos < input.length ? input.charAt(pos) : null,',
-    '          message    = peg$maxFailMessage !== null',
-    '            ? peg$maxFailMessage',
-    '            : buildMessage(expected, found);',
+    '      var posDetails = peg$computePosDetails(pos),',
+    '          found      = pos < input.length ? input.charAt(pos) : null;',
    '',
    '      if (expected !== null) {',
    '        cleanupExpected(expected);',
    '      }',
    '',
    '      return new SyntaxError(',
-    '        message,',
+    '        message !== null ? message : buildMessage(expected, found),',
    '        expected,',
    '        found,',
    '        pos,',
@ -1003,7 +972,11 @@ module.exports = function(ast, options) {
    '    if (peg$result !== peg$FAILED && peg$currPos === input.length) {',
    '      return peg$result;',
    '    } else {',
-    '      throw peg$buildException();',
+    '      throw peg$buildException(',
+    '        null,',
+    '        peg$maxFailExpected,',
+    '        Math.max(peg$currPos, peg$maxFailPos)',
+    '      );',
    '    }',
    '  }',
    '',
--- a/lib/parser.js
+++ b/lib/parser.js
--- a/package.json
+++ b/package.json
@ -20,7 +20,6 @@
    "examples/json.pegjs",
    "lib/compiler.js",
    "lib/compiler/opcodes.js",
-    "lib/compiler/flags.js",
    "lib/compiler/passes/generate-bytecode.js",
    "lib/compiler/passes/generate-javascript.js",
    "lib/compiler/passes/remove-proxy-rules.js",
--- a/spec/compiler/passes/generate-bytecode.spec.js
+++ b/spec/compiler/passes/generate-bytecode.spec.js
@ -91,9 +91,9 @@ describe("compiler pass |generateBytecode|", function() {
        expect(pass).toChangeAST(grammar, bytecodeDetails([
          1,             // PUSH_CURR_POS
          0, 0,          // PUSH
-          12, 7, 0,         // IF_NOT_ERROR
+          12, 6, 0,      // IF_NOT_ERROR
          21, 1,         //   * REPORT_SAVED_POS
-          23, 1, 1, 1, 0,   //     CALL
+          23, 1, 1, 0,   //     CALL
          11, 1, 1,      // IF_ERROR
          6,             //   * NIP_CURR_POS
          5              //   * NIP
@ -115,9 +115,9 @@ describe("compiler pass |generateBytecode|", function() {
        expect(pass).toChangeAST(grammar, bytecodeDetails([
          1,                           // PUSH_CURR_POS
          15, 0, 2, 2, 19, 0, 20, 1,   // <expression>
-          12, 8, 0,                    // IF_NOT_ERROR
+          12, 7, 0,                    // IF_NOT_ERROR
          21, 1,                       //   * REPORT_SAVED_POS
-          23, 2, 1, 1, 1, 0,           //     CALL
+          23, 2, 1, 1, 0,              //     CALL
          11, 1, 1,                    // IF_ERROR
          6,                           //   * NIP_CURR_POS
          5                            //   * NIP
@ -139,13 +139,13 @@ describe("compiler pass |generateBytecode|", function() {
        expect(pass).toChangeAST(grammar, bytecodeDetails([
          1,                           // PUSH_CURR_POS
          15, 1, 2, 2, 19, 1, 20, 2,   // <elements[0]>
-          12, 47, 4,                   // IF_NOT_ERROR
+          12, 46, 4,                   // IF_NOT_ERROR
          15, 3, 2, 2, 19, 3, 20, 4,   //   * <elements[1]>
-          12, 31, 5,                   //     IF_NOT_ERROR
+          12, 30, 5,                   //     IF_NOT_ERROR
          15, 5, 2, 2, 19, 5, 20, 6,   //       * <elements[2]>
-          12, 15, 5,                   //         IF_NOT_ERROR
+          12, 14, 5,                   //         IF_NOT_ERROR
          21, 3,                       //           * REPORT_SAVED_POS
-          23, 7, 1, 3, 3, 2, 1, 0,     //             CALL
+          23, 7, 3, 3, 2, 1, 0,        //             CALL
          11, 1, 1,                    //             IF_ERROR
          6,                           //               * NIP_CURR_POS
          5,                           //               * NIP
@ -316,7 +316,7 @@ describe("compiler pass |generateBytecode|", function() {
      it("generates correct bytecode", function() {
        expect(pass).toChangeAST(grammar, bytecodeDetails([
          22,            // REPORT_CURR_POS
-          23, 0, 0, 0, 0,   // CALL
+          23, 0, 0, 0,   // CALL
          10, 3, 3,      // IF
          2,             //   * POP
          0, 1,          //     PUSH
@ -340,13 +340,13 @@ describe("compiler pass |generateBytecode|", function() {
        expect(pass).toChangeAST(grammar, bytecodeDetails([
          1,                           // PUSH_CURR_POS
          15, 1, 2, 2, 19, 1, 20, 2,   // <elements[0]>
-          12, 61, 4,                   // IF_NOT_ERROR
+          12, 60, 4,                   // IF_NOT_ERROR
          15, 3, 2, 2, 19, 3, 20, 4,   //   * <elements[1]>
-          12, 45, 5,                   //     IF_NOT_ERROR
+          12, 44, 5,                   //     IF_NOT_ERROR
          15, 5, 2, 2, 19, 5, 20, 6,   //       * <elements[2]>
-          12, 29, 5,                   //         IF_NOT_ERROR
+          12, 28, 5,                   //         IF_NOT_ERROR
          22,                          //           * REPORT_CURR_POS
-          23, 7, 0, 0, 3, 2, 1, 0,     //             CALL
+          23, 7, 0, 3, 2, 1, 0,        //             CALL
          10, 3, 3,                    //             IF
          2,                           //               * POP
          0, 8,                        //                 PUSH
@ -393,7 +393,7 @@ describe("compiler pass |generateBytecode|", function() {
      it("generates correct bytecode", function() {
        expect(pass).toChangeAST(grammar, bytecodeDetails([
          22,            // REPORT_CURR_POS
-          23, 0, 0, 0, 0,   // CALL_PREDICATE
+          23, 0, 0, 0,   // CALL_PREDICATE
          10, 3, 3,      // IF
          2,             //   * POP
          0, 2,          //     PUSH
@ -417,13 +417,13 @@ describe("compiler pass |generateBytecode|", function() {
        expect(pass).toChangeAST(grammar, bytecodeDetails([
          1,                           // PUSH_CURR_POS
          15, 1, 2, 2, 19, 1, 20, 2,   // <elements[0]>
-          12, 61, 4,                   // IF_NOT_ERROR
+          12, 60, 4,                   // IF_NOT_ERROR
          15, 3, 2, 2, 19, 3, 20, 4,   //   * <elements[1]>
-          12, 45, 5,                   //     IF_NOT_ERROR
+          12, 44, 5,                   //     IF_NOT_ERROR
          15, 5, 2, 2, 19, 5, 20, 6,   //       * <elements[2]>
-          12, 29, 5,                   //         IF_NOT_ERROR
+          12, 28, 5,                   //         IF_NOT_ERROR
          22,                          //           * REPORT_CURR_POS
-          23, 7, 0, 0, 3, 2, 1, 0,     //             CALL
+          23, 7, 0, 3, 2, 1, 0,        //             CALL
          10, 3, 3,                    //             IF
          2,                           //               * POP
          0, 0,                        //                 PUSH
--- a/spec/generated-parser.spec.js
+++ b/spec/generated-parser.spec.js
@ -337,117 +337,35 @@ describe("generated parser", function() {
        expect(parser).toParse("a", 42);
      });

-      describe("|expected| function", function() {
-        it("generates a regular match failure", function() {
+      it("can use the |expected| function to trigger an error", function() {
        var parser = PEG.buildParser(
              'start = "a" { expected("a"); }',
              options
            );

        expect(parser).toFailToParse("a", {
-            offset:   0,
-            line:     1,
-            column:   1,
+          message:  'Expected a but "a" found.',
          expected: [{ type: "other", description: "a" }],
          found:    "a",
-            message:  'Expected a but "a" found.'
-          });
-        });
-
-        it("generated failures combine with failures generated before", function() {
-          var parser = PEG.buildParser(
-                'start = "a" / ("b" { expected("b"); })',
-                options
-              );
-
-          expect(parser).toFailToParse("b", {
-            expected: [
-              { type: "literal", value: "a", description: '"a"' },
-              { type: "other", description: "b" }
-            ]
-          });
-        });
-
-        it("generated failures combine with failures generated after", function() {
-          var parser = PEG.buildParser(
-                'start = ("a" { expected("a"); }) / "b"',
-                options
-              );
-
-          expect(parser).toFailToParse("a", {
-            expected: [
-              { type: "literal", value: "b", description: '"b"' },
-              { type: "other", description: "a" }
-            ]
-          });
-        });
-
-        it("multiple invocations generate additional failures", function() {
-          var parser = PEG.buildParser(
-                'start = "a" { expected("a1"); expected("a2"); }',
-                options
-              );
-
-          expect(parser).toFailToParse("a", {
-            expected: [
-              { type: "other", description: "a1" },
-              { type: "other", description: "a2" }
-            ]
-          });
-        });
-      });
-
-      describe("|error| function", function() {
-        it("generates a custom match failure", function() {
-          var parser = PEG.buildParser(
-                'start = "a" { error("a"); }',
-                options
-              );
-
-          expect(parser).toFailToParse("a", {
          offset:   0,
          line:     1,
-            column:   1,
-            expected: null,
-            found:    "a",
-            message:  "a"
-          });
-        });
-
-        it("generated failures overrides failures generated before", function() {
-          var parser = PEG.buildParser(
-                'start = "a" / ("b" { error("b"); })',
-                options
-              );
-
-          expect(parser).toFailToParse("b", {
-            message:  "b",
-            expected: null
+          column:   1
        });
      });

-        it("generated failures override failures generated after", function() {
+      it("can use the |error| function to trigger an error", function() {
        var parser = PEG.buildParser(
-                'start = ("a" { error("a"); }) / "b"',
+              'start = "a" { error("a"); }',
              options
            );

        expect(parser).toFailToParse("a", {
          message:  "a",
-            expected: null
-          });
-        });
-
-        it("the last invocation wins", function() {
-          var parser = PEG.buildParser(
-                'start = "a" { error("a1"); error("a2"); }',
-                options
-              );
-
-          expect(parser).toFailToParse("a", {
-            message:  "a2",
-            expected: null
-          });
+          expected: null,
+          found:    "a",
+          offset:   0,
+          line:     1,
+          column:   1
        });
      });

--- a/src/parser.pegjs
+++ b/src/parser.pegjs
@ -270,7 +270,6 @@ classCharacterRange
        error(
          "Invalid character range: " + begin.rawText + "-" + end.rawText + "."
        );
-        return;
      }

      return {