Parser: fix header parsing so that folded whitespace between adjacent MIME encoded-words is ignored

fork
mscdex 11 years ago
parent 0662d18f50
commit 4729de2dab

@ -21,6 +21,8 @@ var CH_LF = 10,
RE_CRLF = /\r\n/g,
RE_HDR = /^([^:]+):[ \t]?(.+)?$/,
RE_ENCWORD = /=\?([^?]*?)\?([qb])\?(.*?)\?=/gi,
RE_ENCWORD_END = /=\?([^?]*?)\?([qb])\?(.*?)\?=$/i,
RE_ENCWORD_BEGIN = /^[ \t]=\?([^?]*?)\?([qb])\?(.*?)\?=/i,
RE_QENC = /(?:=([a-fA-F0-9]{2}))|_/g,
RE_SEARCH_MODSEQ = /^(.+) \(MODSEQ (.+?)\)$/i;
@ -713,22 +715,28 @@ function parseHeader(str, noDecode) {
var lines = str.split(RE_CRLF),
len = lines.length,
header = {},
m, h;
m, h, val;
for (var i = 0; i < len; ++i) {
if (lines[i].length === 0)
continue;
if (lines[i][0] === '\t' || lines[i][0] === ' ') {
if (!noDecode)
lines[i] = decodeWords(lines[i]);
// folded header content
// RFC2822 says to just remove the CRLF and not the whitespace following
// it, so we follow the RFC and include the leading whitespace ...
header[h][header[h].length - 1] += lines[i];
val = lines[i];
if (!noDecode) {
if (RE_ENCWORD_END.test(lines[i - 1])
&& RE_ENCWORD_BEGIN.test(val)) {
// RFC2047 says to *ignore* leading whitespace in folded header values
// for adjacent encoded-words ...
val = val.substring(1);
}
val = decodeWords(val);
}
header[h][header[h].length - 1] += val;
} else {
m = RE_HDR.exec(lines[i]);
if (m) {
h = m[1].toLowerCase();
h = m[1].toLowerCase().trim();
if (m[2]) {
if (!noDecode)
m[2] = decodeWords(m[2]);

@ -0,0 +1,63 @@
var parseHeader = require('../lib/Parser').parseHeader;
var assert = require('assert'),
inspect = require('util').inspect;
var CRLF = '\r\n';
[
{ source: ['To: Foo', CRLF,
' Bar Baz', CRLF],
expected: { to: [ 'Foo Bar Baz' ] },
what: 'Folded header value (plain -- space)'
},
{ source: ['To: Foo', CRLF,
'\tBar\tBaz', CRLF],
expected: { to: [ 'Foo\tBar\tBaz' ] },
what: 'Folded header value (plain -- tab)'
},
{ source: ['Subject: =?iso-8859-1?Q?=A1Hola,_se=F1or!?=', CRLF],
expected: { subject: [ '¡Hola, señor!' ] },
what: 'MIME encoded-word in value'
},
{ source: ['Subject: =?GB2312?Q?=B2=E2=CA=D4=CC=E2=C4=BF=D3=EB=D6=D0=B9=FA=D0=C5_long_subjects_are_not_OK_12?=', CRLF,
' =?GB2312?Q?345678901234567890123456789012345678901234567890123456789012?=', CRLF,
' =?GB2312?Q?345678901234567890?=', CRLF],
expected: { subject: [ '测试题目与中国信 long subjects are not OK 12345678901234567890123456789012345678901234567890123456789012345678901234567890' ] },
what: 'Folded header value (adjacent MIME encoded-words)'
},
{ source: ['Subject: =?GB2312?Q?=B2=E2=CA=D4=CC=E2=C4=BF=D3=EB=D6=D0=B9=FA=D0=C5_long_subjects_are_not_OK_12?=', CRLF,
' 3=?GB2312?Q?45678901234567890123456789012345678901234567890123456789012?=', CRLF,
' 3=?GB2312?Q?45678901234567890?=', CRLF],
expected: { subject: [ '测试题目与中国信 long subjects are not OK 12 345678901234567890123456789012345678901234567890123456789012 345678901234567890' ] },
what: 'Folded header value (non-adjacent MIME encoded-words)'
},
{ source: ['Subject: =?GB2312?Q?=B2=E2=CA=D4=CC=E2=C4=BF=D3=EB=D6=D0=B9=FA=D0=C5_long_subjects_are_not_OK_12?=', CRLF,
' 3=?GB2312?Q?45678901234567890123456789012345678901234567890123456789012?=', CRLF,
' =?GB2312?Q?345678901234567890?=', CRLF],
expected: { subject: [ '测试题目与中国信 long subjects are not OK 12 345678901234567890123456789012345678901234567890123456789012345678901234567890' ] },
what: 'Folded header value (one adjacent, one non-adjacent MIME encoded-words)'
},
].forEach(function(v) {
var result;
try {
result = parseHeader(v.source.join(''));
} catch (e) {
console.log(makeMsg(v.what, 'JS Exception: ' + e.stack));
return;
}
assert.deepEqual(result,
v.expected,
makeMsg(v.what,
'Result mismatch:'
+ '\nParsed: ' + inspect(result, false, 10)
+ '\nExpected: ' + inspect(v.expected, false, 10)
)
);
});
function makeMsg(what, msg) {
return '[' + what + ']: ' + msg;
}
Loading…
Cancel
Save