Parser: fix premature decoding of encoded words ending with an incomplete multi-byte character

fork
Chotiwat Chawannakul 11 years ago
parent 397836e94c
commit b630ed05ce

@ -836,21 +836,46 @@ function decodeWords(str, state) {
var pendoffset = -1;
state.replaces = [];
var bytes, m, i, j, lenj, seq;
// generate replacement substrings and their positions
var bytes, m, i, j, leni, lenj, seq, replaces = [], lastReplace = {};
// join consecutive encoded words that have the same charset and encoding first
while (m = RE_ENCWORD.exec(str)) {
state.consecutive = (pendoffset > -1
? RE_LWS_ONLY.test(str.substring(pendoffset, m.index))
: false);
if (m[2].toLowerCase() === 'q') {
seq = {
consecutive: (pendoffset > -1
? RE_LWS_ONLY.test(str.substring(pendoffset, m.index))
: false),
charset: m[1].toLowerCase(),
encoding: m[2].toLowerCase(),
chunk: m[3],
index: m.index,
length: m[0].length,
pendoffset: pendoffset,
};
lastReplace = replaces.length && replaces[replaces.length - 1];
if (seq.consecutive
&& seq.charset === lastReplace.charset
&& seq.encoding === lastReplace.encoding) {
lastReplace.length += seq.length + seq.index - pendoffset;
lastReplace.chunk += seq.chunk;
} else {
replaces.push(seq);
lastReplace = seq;
}
pendoffset = m.index + m[0].length;
}
// generate replacement substrings and their positions
for (i = 0, leni = replaces.length; i < leni; ++i) {
m = replaces[i];
state.consecutive = m.consecutive;
if (m.encoding === 'q') {
// q-encoding, similar to quoted-printable
bytes = new Buffer(m[3].replace(RE_QENC, qEncReplacer), 'binary');
bytes = new Buffer(m.chunk.replace(RE_QENC, qEncReplacer), 'binary');
} else {
// base64
bytes = new Buffer(m[3], 'base64');
bytes = new Buffer(m.chunk, 'base64');
}
decodeBytes(bytes, m[1].toLowerCase(), m.index, m[0].length, pendoffset, state);
pendoffset = m.index + m[0].length;
decodeBytes(bytes, m.charset, m.index, m.length, m.pendoffset, state);
}
// perform the actual replacements

@ -52,6 +52,11 @@ var CRLF = '\r\n';
expected: { subject: [ 'ไทย ไทย ไทย' ] },
what: 'Folded header value (adjacent MIME encoded-words seperated by linear whitespace)'
},
{ source: ['Subject: =?utf-8?Q?abcdefghij_=E0=B9=83=E0=B8=99_klmnopqr_=E0=B9=84=E0=B8=A1=E0=B9?=', CRLF,
' =?utf-8?Q?=88=E0=B8=82=E0=B8=B6=E0=B9=89=E0=B8=99?=', CRLF],
expected: { subject: [ 'abcdefghij ใน klmnopqr ไม่ขึ้น' ] },
what: 'Folded header value (incomplete multi-byte character split)'
},
// header with body
{ source: ['Subject: test subject', CRLF,
'X-Another-Header: test', CRLF,

Loading…
Cancel
Save