From b630ed05cef681c164d35934ab23782c64bfe03d Mon Sep 17 00:00:00 2001 From: Chotiwat Chawannakul Date: Thu, 13 Mar 2014 20:02:27 +0700 Subject: [PATCH] Parser: fix premature decoding of encoded words ending with an incomplete multi-byte character --- lib/Parser.js | 45 ++++++++++++++++++++++++++++++--------- test/test-parse-header.js | 5 +++++ 2 files changed, 40 insertions(+), 10 deletions(-) diff --git a/lib/Parser.js b/lib/Parser.js index 8a5e28f..a98cf58 100644 --- a/lib/Parser.js +++ b/lib/Parser.js @@ -836,21 +836,46 @@ function decodeWords(str, state) { var pendoffset = -1; state.replaces = []; - var bytes, m, i, j, lenj, seq; - // generate replacement substrings and their positions + var bytes, m, i, j, leni, lenj, seq, replaces = [], lastReplace = {}; + + // join consecutive encoded words that have the same charset and encoding first while (m = RE_ENCWORD.exec(str)) { - state.consecutive = (pendoffset > -1 - ? RE_LWS_ONLY.test(str.substring(pendoffset, m.index)) - : false); - if (m[2].toLowerCase() === 'q') { + seq = { + consecutive: (pendoffset > -1 + ? RE_LWS_ONLY.test(str.substring(pendoffset, m.index)) + : false), + charset: m[1].toLowerCase(), + encoding: m[2].toLowerCase(), + chunk: m[3], + index: m.index, + length: m[0].length, + pendoffset: pendoffset, + }; + lastReplace = replaces.length && replaces[replaces.length - 1]; + if (seq.consecutive + && seq.charset === lastReplace.charset + && seq.encoding === lastReplace.encoding) { + lastReplace.length += seq.length + seq.index - pendoffset; + lastReplace.chunk += seq.chunk; + } else { + replaces.push(seq); + lastReplace = seq; + } + pendoffset = m.index + m[0].length; + } + + // generate replacement substrings and their positions + for (i = 0, leni = replaces.length; i < leni; ++i) { + m = replaces[i]; + state.consecutive = m.consecutive; + if (m.encoding === 'q') { // q-encoding, similar to quoted-printable - bytes = new Buffer(m[3].replace(RE_QENC, qEncReplacer), 'binary'); + bytes = new Buffer(m.chunk.replace(RE_QENC, qEncReplacer), 'binary'); } else { // base64 - bytes = new Buffer(m[3], 'base64'); + bytes = new Buffer(m.chunk, 'base64'); } - decodeBytes(bytes, m[1].toLowerCase(), m.index, m[0].length, pendoffset, state); - pendoffset = m.index + m[0].length; + decodeBytes(bytes, m.charset, m.index, m.length, m.pendoffset, state); } // perform the actual replacements diff --git a/test/test-parse-header.js b/test/test-parse-header.js index 3c47db9..71df863 100644 --- a/test/test-parse-header.js +++ b/test/test-parse-header.js @@ -52,6 +52,11 @@ var CRLF = '\r\n'; expected: { subject: [ 'ไทย ไทย ไทย' ] }, what: 'Folded header value (adjacent MIME encoded-words seperated by linear whitespace)' }, + { source: ['Subject: =?utf-8?Q?abcdefghij_=E0=B9=83=E0=B8=99_klmnopqr_=E0=B9=84=E0=B8=A1=E0=B9?=', CRLF, + ' =?utf-8?Q?=88=E0=B8=82=E0=B8=B6=E0=B9=89=E0=B8=99?=', CRLF], + expected: { subject: [ 'abcdefghij ใน klmnopqr ไม่ขึ้น' ] }, + what: 'Folded header value (incomplete multi-byte character split)' + }, // header with body { source: ['Subject: test subject', CRLF, 'X-Another-Header: test', CRLF,