Parser: fix base64 decoding of multi-byte character

fork
Chotiwat Chawannakul 10 years ago
parent bf6672d64e
commit 4d3da4ab0f

@ -744,7 +744,7 @@ function repeat(chr, len) {
return s;
}
function decodeBytes(buf, encoding, offset, mlen, pendoffset, state) {
function decodeBytes(buf, encoding, offset, mlen, pendoffset, state, nextBuf) {
if (!jsencoding)
jsencoding = require('../deps/encoding/encoding');
if (jsencoding.encodingExists(encoding)) {
@ -767,11 +767,39 @@ function decodeBytes(buf, encoding, offset, mlen, pendoffset, state) {
}
}
var ret, isPartial = false;
try {
ret = jsencoding.TextDecoder(encoding).decode(buf);
} catch (e) {
if (e.message.indexOf('Seeking') === 0)
isPartial = true;
if (state.remainder !== undefined) {
// use cached remainder from the previous lookahead
ret = state.remainder;
state.remainder = undefined;
} else {
try {
ret = jsencoding.TextDecoder(encoding).decode(buf);
} catch (e) {
if (e.message.indexOf('Seeking') === 0)
isPartial = true;
}
}
if (!isPartial && nextBuf) {
// try to decode a lookahead buffer (current buffer + next buffer)
// and see if it starts with the decoded value of the current buffer.
// if not, the current buffer is partial
var lookahead, lookaheadBuf = new Buffer(buf.length + nextBuf.length);
buf.copy(lookaheadBuf);
nextBuf.copy(lookaheadBuf, buf.length);
try {
lookahead = jsencoding.TextDecoder(encoding).decode(lookaheadBuf);
} catch(e) {
// cannot decode the lookahead, do nothing
}
if (lookahead !== undefined) {
if (lookahead.indexOf(ret) === 0) {
// the current buffer is whole, cache the lookahead's remainder
state.remainder = lookahead.substring(ret.length);
} else {
isPartial = true;
ret = undefined;
}
}
}
if (ret !== undefined) {
if (state.curReplace) {
@ -836,7 +864,7 @@ function decodeWords(str, state) {
var pendoffset = -1;
state.replaces = [];
var bytes, m, i, j, leni, lenj, seq, replaces = [], lastReplace = {};
var bytes, m, next, i, j, leni, lenj, seq, replaces = [], lastReplace = {};
// join consecutive q-encoded words that have the same charset first
while (m = RE_ENCWORD.exec(str)) {
@ -850,6 +878,7 @@ function decodeWords(str, state) {
index: m.index,
length: m[0].length,
pendoffset: pendoffset,
buf: undefined
};
lastReplace = replaces.length && replaces[replaces.length - 1];
if (seq.consecutive
@ -872,11 +901,20 @@ function decodeWords(str, state) {
if (m.encoding === 'q') {
// q-encoding, similar to quoted-printable
bytes = new Buffer(m.chunk.replace(RE_QENC, qEncReplacer), 'binary');
next = undefined;
} else {
// base64
bytes = new Buffer(m.chunk, 'base64');
bytes = m.buf || new Buffer(m.chunk, 'base64');
next = replaces[i + 1];
if (next && next.consecutive && next.encoding === m.encoding
&& next.charset === m.charset) {
// we use the next base64 chunk, if any, to determine the integrity
// of the current chunk
next.buf = new Buffer(next.chunk, 'base64');
}
}
decodeBytes(bytes, m.charset, m.index, m.length, m.pendoffset, state);
decodeBytes(bytes, m.charset, m.index, m.length, m.pendoffset, state,
next && next.buf);
}
// perform the actual replacements
@ -907,7 +945,8 @@ function parseHeader(str, noDecode) {
encoding: undefined,
consecutive: false,
replaces: undefined,
curReplace: undefined
curReplace: undefined,
remainder: undefined
},
m, h, i, val;

@ -62,7 +62,13 @@ var CRLF = '\r\n';
' =?utf-8?B?4Liy4LmB4Lib4Lil4LiBIOC5hiDguKPguK3=?=', CRLF,
' =?utf-8?Q?=E0=B8=9A=E0=B9=82=E0=B8=A5=E0=B8=81?=', CRLF],
expected: { subject: [ 'FW: สิ่งมีชีวิตหน้าตาแปลก ๆ รอบโลก' ] },
what: 'Folded header value (consecutive base64-encoded words)'
what: 'Folded header value (consecutive complete base64-encoded words)'
},
{ source: ['Subject: =?utf-8?B?4Lij4Li54Lib4Lig4Liy4Lie4LiX4Li14LmIIGVtYmVkIOC5g+C4meC5gOC4?=', CRLF,
' =?utf-8?B?meC4t+C5ieC4reC5gOC4oeC4peC4peC5jOC5hOC4oeC5iOC5geC4quC4lOC4?=', CRLF,
' =?utf-8?B?hw==?=', CRLF],
expected: { subject: [ 'รูปภาพที่ embed ในเนื้อเมลล์ไม่แสดง' ] },
what: 'Folded header value (consecutive partial base64-encoded words)'
},
// header with body
{ source: ['Subject: test subject', CRLF,

Loading…
Cancel
Save