Improve expression descriptions in error messages

Before this commit, descriptions of literals used in error messages were
built by applying JavaScript string escaping to their values, making the
descriptions look like JavaScript strings. Descriptions of character
classes were built using their raw text. These approaches were mutually
inconsistent and lead to descriptions which were over-escaped and not
necessarily human-friendly (in case of literals) or coupled with details
of the grammar (in case of character classes).

This commit changes description building code in both cases and unifies
it. The intent is to generate human-friendly descriptions of matched
expressions which are clean, unambiguous, and which don't escape too
many characters, while handling special characters such as newlines
well.

Fixes #127.
redux
David Majda 8 years ago
parent 2fd77b96fc
commit 4fe682794d

@ -280,6 +280,36 @@ function generateBytecode(ast) {
);
}
function hex(ch) {
return ch.charCodeAt(0).toString(16).toUpperCase();
}
function literalDescriptionEscape(s) {
return s
.replace(/\\/g, '\\\\') // backslash
.replace(/"/g, '\\"') // closing double quote
.replace(/\0/g, '\\0') // null
.replace(/\t/g, '\\t') // horizontal tab
.replace(/\n/g, '\\n') // line feed
.replace(/\r/g, '\\r') // carriage return
.replace(/[\x01-\x08\x0B\x0C\x0E\x0F]/g, function(ch) { return '\\x0' + hex(ch); })
.replace(/[\x10-\x1F\x7F-\x9F]/g, function(ch) { return '\\x' + hex(ch); });
}
function classDescriptionEscape(s) {
return s
.replace(/\\/g, '\\\\') // backslash
.replace(/\]/g, '\\]') // closing bracket
.replace(/\^/g, '\\^') // caret
.replace(/-/g, '\\-') // dash
.replace(/\0/g, '\\0') // null
.replace(/\t/g, '\\t') // horizontal tab
.replace(/\n/g, '\\n') // line feed
.replace(/\r/g, '\\r') // carriage return
.replace(/[\x01-\x08\x0B\x0C\x0E\x0F]/g, function(ch) { return '\\x0' + hex(ch); })
.replace(/[\x10-\x1F\x7F-\x9F]/g, function(ch) { return '\\x' + hex(ch); });
}
var generate = visitor.build({
grammar: function(node) {
arrays.each(node.rules, generate);
@ -545,7 +575,7 @@ function generateBytecode(ast) {
'type: "literal",',
'value: "' + js.stringEscape(node.value) + '",',
'description: "'
+ js.stringEscape('"' + js.stringEscape(node.value) + '"')
+ js.stringEscape('"' + literalDescriptionEscape(node.value) + '"')
+ '"',
'}'
].join(' '));
@ -572,7 +602,7 @@ function generateBytecode(ast) {
},
"class": function(node) {
var regexp, regexpIndex, expectedIndex;
var regexp, description, regexpIndex, expectedIndex;
if (node.parts.length > 0) {
regexp = '/^['
@ -593,12 +623,23 @@ function generateBytecode(ast) {
regexp = node.inverted ? '/^[\\S\\s]/' : '/^(?!)/';
}
description = "["
+ (node.inverted ? "^" : "")
+ arrays.map(node.parts, function(part) {
return part instanceof Array
? classDescriptionEscape(part[0])
+ "-"
+ classDescriptionEscape(part[1])
: classDescriptionEscape(part);
}).join("")
+ "]" + (node.ignoreCase ? "i" : "");
regexpIndex = addConst(regexp);
expectedIndex = addConst([
'{',
'type: "class",',
'value: "' + js.stringEscape(node.rawText) + '",',
'description: "' + js.stringEscape(node.rawText) + '"',
'description: "' + js.stringEscape(description) + '"',
'}'
].join(' '));

File diff suppressed because one or more lines are too long
Loading…
Cancel
Save