Error handling: Implement the |expected| function

The |expected| function allows users to report regular match failures
inside actions.

If the |expected| function is called, and the reported match failure
turns out to be the cause of a parse error, the error message reported
by the parser will be in the usual "Expected ... but found ..." format
with the description specified in the |expected| call used as part of
the message.

Implements part of #198.

Speed impact
------------
Before:     1146.82 kB/s
After:      1031.25 kB/s
Difference: -10.08%

Size impact
-----------
Before:     950817 b
After:      973269 b
Difference: 2.36%

(Measured by /tools/impact with Node.js v0.6.18 on x86_64 GNU/Linux.)
redux
David Majda 11 years ago
parent 1b2279e026
commit af701dcf80

@ -9,6 +9,7 @@ MODULES = utils \
grammar-error \ grammar-error \
parser \ parser \
compiler/opcodes \ compiler/opcodes \
compiler/flags \
compiler/passes/generate-bytecode \ compiler/passes/generate-bytecode \
compiler/passes/generate-javascript \ compiler/passes/generate-javascript \
compiler/passes/remove-proxy-rules \ compiler/passes/remove-proxy-rules \

@ -376,6 +376,11 @@ expression as its arguments. The action should return some JavaScript value
using the `return` statement. This value is considered match result of the using the `return` statement. This value is considered match result of the
preceding expression. preceding expression.
To indicate a match failure, the code inside the action can invoke the
`expected` function. It takes one parameter — a description of what was expected
at the current position. This description will be used as part of a message of
the exception thrown if the match failure leads to an parse error.
The code inside the action can access all variables and functions defined in the The code inside the action can access all variables and functions defined in the
initializer at the beginning of the grammar. Curly braces in the action code initializer at the beginning of the grammar. Curly braces in the action code
must be balanced. must be balanced.

@ -0,0 +1,6 @@
/* Bytecode instruction flags. */
module.exports = {
DONT_CHECK_FAILS: 0,
CHECK_FAILS: 1
};

@ -1,5 +1,6 @@
var utils = require("../../utils"), var utils = require("../../utils"),
op = require("../opcodes"); op = require("../opcodes"),
flags = require("../flags");
/* Generates bytecode. /* Generates bytecode.
* *
@ -151,9 +152,9 @@ var utils = require("../../utils"),
* *
* reportedPos = currPos; * reportedPos = currPos;
* *
* [23] CALL f, n, pc, p1, p2, ..., pN * [23] CALL f, c, n, pc, p1, p2, ..., pN
* *
* value = consts[f](stack[p1], ..., stack[pN]); * value = call(consts[f], c, stack[p1], ..., stack[pN]);
* stack.pop(n); * stack.pop(n);
* stack.push(value); * stack.push(value);
* *
@ -206,10 +207,16 @@ module.exports = function(ast, options) {
return condCode.concat([bodyCode.length], bodyCode); return condCode.concat([bodyCode.length], bodyCode);
} }
function buildCall(functionIndex, delta, env, sp) { function buildCall(functionIndex, checkFails, delta, env, sp) {
var params = utils.map( utils.values(env), function(p) { return sp - p; }); var params = utils.map( utils.values(env), function(p) { return sp - p; });
return [op.CALL, functionIndex, delta, params.length].concat(params); return [
op.CALL,
functionIndex,
checkFails,
delta,
params.length
].concat(params);
} }
function buildSimplePredicate(expression, negative, context) { function buildSimplePredicate(expression, negative, context) {
@ -248,7 +255,13 @@ module.exports = function(ast, options) {
return buildSequence( return buildSequence(
[op.REPORT_CURR_POS], [op.REPORT_CURR_POS],
buildCall(functionIndex, 0, context.env, context.sp), buildCall(
functionIndex,
flags.DONT_CHECK_FAILS,
0,
context.env,
context.sp
),
buildCondition( buildCondition(
[op.IF], [op.IF],
buildSequence( buildSequence(
@ -347,7 +360,13 @@ module.exports = function(ast, options) {
[op.IF_NOT_ERROR], [op.IF_NOT_ERROR],
buildSequence( buildSequence(
[op.REPORT_SAVED_POS, 1], [op.REPORT_SAVED_POS, 1],
buildCall(functionIndex, 1, env, context.sp + 2) buildCall(
functionIndex,
flags.CHECK_FAILS,
1,
env,
context.sp + 2
)
), ),
[] []
), ),
@ -396,6 +415,7 @@ module.exports = function(ast, options) {
[op.REPORT_SAVED_POS, node.elements.length], [op.REPORT_SAVED_POS, node.elements.length],
buildCall( buildCall(
functionIndex, functionIndex,
flags.CHECK_FAILS,
node.elements.length, node.elements.length,
context.env, context.env,
context.sp context.sp

@ -1,5 +1,6 @@
var utils = require("../../utils"), var utils = require("../../utils"),
op = require("../opcodes"); op = require("../opcodes"),
flags = require("../flags");
/* Generates parser JavaScript code. */ /* Generates parser JavaScript code. */
module.exports = function(ast, options) { module.exports = function(ast, options) {
@ -102,7 +103,7 @@ module.exports = function(ast, options) {
} }
function generateCall() { function generateCall() {
var baseLength = 4, var baseLength = 5,
paramsLengthCode = 'bc[ip + ' + (baseLength - 1) + ']'; paramsLengthCode = 'bc[ip + ' + (baseLength - 1) + ']';
return [ return [
@ -111,11 +112,15 @@ module.exports = function(ast, options) {
' params[i] = stack[stack.length - 1 - params[i]];', ' params[i] = stack[stack.length - 1 - params[i]];',
'}', '}',
'', '',
'stack.splice(', 'if (bc[ip + 2] === ' + flags.CHECK_FAILS + ') {',
' stack.length - bc[ip + 2],', ' peg$userFail = false;',
' bc[ip + 2],', '}',
' peg$consts[bc[ip + 1]].apply(null, params)', 'result = peg$consts[bc[ip + 1]].apply(null, params);',
');', 'if (bc[ip + 2] === ' + flags.CHECK_FAILS + ') {',
' if (peg$userFail) { result = peg$FAILED; }',
'}',
'',
'stack.splice(stack.length - bc[ip + 3], bc[ip + 3], result);',
'', '',
'ip += ' + baseLength + ' + ' + paramsLengthCode + ';', 'ip += ' + baseLength + ' + ' + paramsLengthCode + ';',
'break;' 'break;'
@ -140,7 +145,7 @@ module.exports = function(ast, options) {
' end = bc.length,', ' end = bc.length,',
' ends = [],', ' ends = [],',
' stack = [],', ' stack = [],',
' params, i;', ' params, result, i;',
'' ''
].join('\n')); ].join('\n'));
@ -272,7 +277,7 @@ module.exports = function(ast, options) {
' case ' + op.FAIL + ':', // FAIL e ' case ' + op.FAIL + ':', // FAIL e
' stack.push(peg$FAILED);', ' stack.push(peg$FAILED);',
' if (peg$silentFails === 0) {', ' if (peg$silentFails === 0) {',
' peg$fail(peg$consts[bc[ip + 1]]);', ' peg$fail(peg$consts[bc[ip + 1]], peg$currPos);',
' }', ' }',
' ip += 2;', ' ip += 2;',
' break;', ' break;',
@ -419,7 +424,7 @@ module.exports = function(ast, options) {
} }
function compileCall(cond) { function compileCall(cond) {
var baseLength = 4, var baseLength = 5,
paramsLength = bc[ip + baseLength - 1]; paramsLength = bc[ip + baseLength - 1];
var value = c(bc[ip + 1]) + '(' var value = c(bc[ip + 1]) + '('
@ -428,8 +433,14 @@ module.exports = function(ast, options) {
stackIndex stackIndex
).join(', ') ).join(', ')
+ ')'; + ')';
stack.pop(bc[ip + 2]); stack.pop(bc[ip + 3]);
if (bc[ip + 2] === flags.CHECK_FAILS) {
parts.push('peg$userFail = false;');
}
parts.push(stack.push(value)); parts.push(stack.push(value));
if (bc[ip + 2] === flags.CHECK_FAILS) {
parts.push('if (peg$userFail) { ' + stack.top() + ' = peg$FAILED; }');
}
ip += baseLength + paramsLength; ip += baseLength + paramsLength;
} }
@ -586,7 +597,7 @@ module.exports = function(ast, options) {
case op.FAIL: // FAIL e case op.FAIL: // FAIL e
parts.push(stack.push('peg$FAILED')); parts.push(stack.push('peg$FAILED'));
parts.push('if (peg$silentFails === 0) { peg$fail(' + c(bc[ip + 1]) + '); }'); parts.push('if (peg$silentFails === 0) { peg$fail(' + c(bc[ip + 1]) + ', peg$currPos); }');
ip += 2; ip += 2;
break; break;
@ -791,6 +802,7 @@ module.exports = function(ast, options) {
' peg$maxFailPos = 0,', ' peg$maxFailPos = 0,',
' peg$maxFailExpected = [],', ' peg$maxFailExpected = [],',
' peg$silentFails = 0,', // 0 = report failures, > 0 = silence failures ' peg$silentFails = 0,', // 0 = report failures, > 0 = silence failures
' peg$userFail = false,',
'' ''
].join('\n')); ].join('\n'));
@ -843,6 +855,13 @@ module.exports = function(ast, options) {
' return peg$computePosDetails(peg$reportedPos).column;', ' return peg$computePosDetails(peg$reportedPos).column;',
' }', ' }',
'', '',
' function expected(description) {',
' if (peg$silentFails === 0) {',
' peg$fail({ type: "other", description: description }, peg$reportedPos);',
' }',
' peg$userFail = true;',
' }',
'',
' function peg$computePosDetails(pos) {', ' function peg$computePosDetails(pos) {',
' function advance(details, startPos, endPos) {', ' function advance(details, startPos, endPos) {',
' var p, ch;', ' var p, ch;',
@ -876,11 +895,11 @@ module.exports = function(ast, options) {
' return peg$cachedPosDetails;', ' return peg$cachedPosDetails;',
' }', ' }',
'', '',
' function peg$fail(expected) {', ' function peg$fail(expected, pos) {',
' if (peg$currPos < peg$maxFailPos) { return; }', ' if (pos < peg$maxFailPos) { return; }',
'', '',
' if (peg$currPos > peg$maxFailPos) {', ' if (pos > peg$maxFailPos) {',
' peg$maxFailPos = peg$currPos;', ' peg$maxFailPos = pos;',
' peg$maxFailExpected = [];', ' peg$maxFailExpected = [];',
' }', ' }',
'', '',
@ -888,7 +907,7 @@ module.exports = function(ast, options) {
' }', ' }',
'', '',
' function peg$cleanupExpected(expected) {', ' function peg$cleanupExpected(expected) {',
' var i = 0;', ' var i = 1;',
'', '',
' expected.sort(function(a, b) {', ' expected.sort(function(a, b) {',
' if (a.description < b.description) {', ' if (a.description < b.description) {',
@ -900,13 +919,8 @@ module.exports = function(ast, options) {
' }', ' }',
' });', ' });',
'', '',
/*
* This works because the bytecode generator guarantees that every
* expectation object exists only once, so it's enough to use |===| instead
* of deeper structural comparison.
*/
' while (i < expected.length) {', ' while (i < expected.length) {',
' if (expected[i - 1] === expected[i]) {', ' if (expected[i - 1].description === expected[i].description) {',
' expected.splice(i, 1);', ' expected.splice(i, 1);',
' } else {', ' } else {',
' i++;', ' i++;',

File diff suppressed because it is too large Load Diff

@ -20,6 +20,7 @@
"examples/json.pegjs", "examples/json.pegjs",
"lib/compiler.js", "lib/compiler.js",
"lib/compiler/opcodes.js", "lib/compiler/opcodes.js",
"lib/compiler/flags.js",
"lib/compiler/passes/generate-bytecode.js", "lib/compiler/passes/generate-bytecode.js",
"lib/compiler/passes/generate-javascript.js", "lib/compiler/passes/generate-javascript.js",
"lib/compiler/passes/remove-proxy-rules.js", "lib/compiler/passes/remove-proxy-rules.js",

@ -89,14 +89,14 @@ describe("compiler pass |generateBytecode|", function() {
it("generates correct bytecode", function() { it("generates correct bytecode", function() {
expect(pass).toChangeAST(grammar, bytecodeDetails([ expect(pass).toChangeAST(grammar, bytecodeDetails([
1, // PUSH_CURR_POS 1, // PUSH_CURR_POS
0, 0, // PUSH 0, 0, // PUSH
12, 6, 0, // IF_NOT_ERROR 12, 7, 0, // IF_NOT_ERROR
21, 1, // * REPORT_SAVED_POS 21, 1, // * REPORT_SAVED_POS
23, 1, 1, 0, // CALL 23, 1, 1, 1, 0, // CALL
11, 1, 1, // IF_ERROR 11, 1, 1, // IF_ERROR
6, // * NIP_CURR_POS 6, // * NIP_CURR_POS
5 // * NIP 5 // * NIP
])); ]));
}); });
@ -115,9 +115,9 @@ describe("compiler pass |generateBytecode|", function() {
expect(pass).toChangeAST(grammar, bytecodeDetails([ expect(pass).toChangeAST(grammar, bytecodeDetails([
1, // PUSH_CURR_POS 1, // PUSH_CURR_POS
15, 0, 2, 2, 19, 0, 20, 1, // <expression> 15, 0, 2, 2, 19, 0, 20, 1, // <expression>
12, 7, 0, // IF_NOT_ERROR 12, 8, 0, // IF_NOT_ERROR
21, 1, // * REPORT_SAVED_POS 21, 1, // * REPORT_SAVED_POS
23, 2, 1, 1, 0, // CALL 23, 2, 1, 1, 1, 0, // CALL
11, 1, 1, // IF_ERROR 11, 1, 1, // IF_ERROR
6, // * NIP_CURR_POS 6, // * NIP_CURR_POS
5 // * NIP 5 // * NIP
@ -139,13 +139,13 @@ describe("compiler pass |generateBytecode|", function() {
expect(pass).toChangeAST(grammar, bytecodeDetails([ expect(pass).toChangeAST(grammar, bytecodeDetails([
1, // PUSH_CURR_POS 1, // PUSH_CURR_POS
15, 1, 2, 2, 19, 1, 20, 2, // <elements[0]> 15, 1, 2, 2, 19, 1, 20, 2, // <elements[0]>
12, 46, 4, // IF_NOT_ERROR 12, 47, 4, // IF_NOT_ERROR
15, 3, 2, 2, 19, 3, 20, 4, // * <elements[1]> 15, 3, 2, 2, 19, 3, 20, 4, // * <elements[1]>
12, 30, 5, // IF_NOT_ERROR 12, 31, 5, // IF_NOT_ERROR
15, 5, 2, 2, 19, 5, 20, 6, // * <elements[2]> 15, 5, 2, 2, 19, 5, 20, 6, // * <elements[2]>
12, 14, 5, // IF_NOT_ERROR 12, 15, 5, // IF_NOT_ERROR
21, 3, // * REPORT_SAVED_POS 21, 3, // * REPORT_SAVED_POS
23, 7, 3, 3, 2, 1, 0, // CALL 23, 7, 1, 3, 3, 2, 1, 0, // CALL
11, 1, 1, // IF_ERROR 11, 1, 1, // IF_ERROR
6, // * NIP_CURR_POS 6, // * NIP_CURR_POS
5, // * NIP 5, // * NIP
@ -315,13 +315,13 @@ describe("compiler pass |generateBytecode|", function() {
it("generates correct bytecode", function() { it("generates correct bytecode", function() {
expect(pass).toChangeAST(grammar, bytecodeDetails([ expect(pass).toChangeAST(grammar, bytecodeDetails([
22, // REPORT_CURR_POS 22, // REPORT_CURR_POS
23, 0, 0, 0, // CALL 23, 0, 0, 0, 0, // CALL
10, 3, 3, // IF 10, 3, 3, // IF
2, // * POP 2, // * POP
0, 1, // PUSH 0, 1, // PUSH
2, // * POP 2, // * POP
0, 2 // PUSH 0, 2 // PUSH
])); ]));
}); });
@ -340,13 +340,13 @@ describe("compiler pass |generateBytecode|", function() {
expect(pass).toChangeAST(grammar, bytecodeDetails([ expect(pass).toChangeAST(grammar, bytecodeDetails([
1, // PUSH_CURR_POS 1, // PUSH_CURR_POS
15, 1, 2, 2, 19, 1, 20, 2, // <elements[0]> 15, 1, 2, 2, 19, 1, 20, 2, // <elements[0]>
12, 60, 4, // IF_NOT_ERROR 12, 61, 4, // IF_NOT_ERROR
15, 3, 2, 2, 19, 3, 20, 4, // * <elements[1]> 15, 3, 2, 2, 19, 3, 20, 4, // * <elements[1]>
12, 44, 5, // IF_NOT_ERROR 12, 45, 5, // IF_NOT_ERROR
15, 5, 2, 2, 19, 5, 20, 6, // * <elements[2]> 15, 5, 2, 2, 19, 5, 20, 6, // * <elements[2]>
12, 28, 5, // IF_NOT_ERROR 12, 29, 5, // IF_NOT_ERROR
22, // * REPORT_CURR_POS 22, // * REPORT_CURR_POS
23, 7, 0, 3, 2, 1, 0, // CALL 23, 7, 0, 0, 3, 2, 1, 0, // CALL
10, 3, 3, // IF 10, 3, 3, // IF
2, // * POP 2, // * POP
0, 8, // PUSH 0, 8, // PUSH
@ -392,13 +392,13 @@ describe("compiler pass |generateBytecode|", function() {
it("generates correct bytecode", function() { it("generates correct bytecode", function() {
expect(pass).toChangeAST(grammar, bytecodeDetails([ expect(pass).toChangeAST(grammar, bytecodeDetails([
22, // REPORT_CURR_POS 22, // REPORT_CURR_POS
23, 0, 0, 0, // CALL_PREDICATE 23, 0, 0, 0, 0, // CALL_PREDICATE
10, 3, 3, // IF 10, 3, 3, // IF
2, // * POP 2, // * POP
0, 2, // PUSH 0, 2, // PUSH
2, // * POP 2, // * POP
0, 1 // PUSH 0, 1 // PUSH
])); ]));
}); });
@ -417,13 +417,13 @@ describe("compiler pass |generateBytecode|", function() {
expect(pass).toChangeAST(grammar, bytecodeDetails([ expect(pass).toChangeAST(grammar, bytecodeDetails([
1, // PUSH_CURR_POS 1, // PUSH_CURR_POS
15, 1, 2, 2, 19, 1, 20, 2, // <elements[0]> 15, 1, 2, 2, 19, 1, 20, 2, // <elements[0]>
12, 60, 4, // IF_NOT_ERROR 12, 61, 4, // IF_NOT_ERROR
15, 3, 2, 2, 19, 3, 20, 4, // * <elements[1]> 15, 3, 2, 2, 19, 3, 20, 4, // * <elements[1]>
12, 44, 5, // IF_NOT_ERROR 12, 45, 5, // IF_NOT_ERROR
15, 5, 2, 2, 19, 5, 20, 6, // * <elements[2]> 15, 5, 2, 2, 19, 5, 20, 6, // * <elements[2]>
12, 28, 5, // IF_NOT_ERROR 12, 29, 5, // IF_NOT_ERROR
22, // * REPORT_CURR_POS 22, // * REPORT_CURR_POS
23, 7, 0, 3, 2, 1, 0, // CALL 23, 7, 0, 0, 3, 2, 1, 0, // CALL
10, 3, 3, // IF 10, 3, 3, // IF
2, // * POP 2, // * POP
0, 0, // PUSH 0, 0, // PUSH

@ -337,6 +337,66 @@ describe("generated parser", function() {
expect(parser).toParse("a", 42); expect(parser).toParse("a", 42);
}); });
describe("|expected| function", function() {
it("generates a regular match failure", function() {
var parser = PEG.buildParser(
'start = "a" { expected("a"); }',
options
);
expect(parser).toFailToParse("a", {
offset: 0,
line: 1,
column: 1,
expected: [{ type: "other", description: "a" }],
found: "a",
message: 'Expected a but "a" found.'
});
});
it("generated failures combine with failures generated before", function() {
var parser = PEG.buildParser(
'start = "a" / ("b" { expected("b"); })',
options
);
expect(parser).toFailToParse("b", {
expected: [
{ type: "literal", value: "a", description: '"a"' },
{ type: "other", description: "b" }
]
});
});
it("generated failures combine with failures generated after", function() {
var parser = PEG.buildParser(
'start = ("a" { expected("a"); }) / "b"',
options
);
expect(parser).toFailToParse("a", {
expected: [
{ type: "literal", value: "b", description: '"b"' },
{ type: "other", description: "a" }
]
});
});
it("multiple invocations generate additional failures", function() {
var parser = PEG.buildParser(
'start = "a" { expected("a1"); expected("a2"); }',
options
);
expect(parser).toFailToParse("a", {
expected: [
{ type: "other", description: "a1" },
{ type: "other", description: "a2" }
]
});
});
});
it("can use functions defined in the initializer", function() { it("can use functions defined in the initializer", function() {
var parser = PEG.buildParser([ var parser = PEG.buildParser([
'{ function f() { return 42; } }', '{ function f() { return 42; } }',

Loading…
Cancel
Save