diff --git a/lib/Parser.js b/lib/Parser.js index a1a370c..8d4c661 100644 --- a/lib/Parser.js +++ b/lib/Parser.js @@ -745,7 +745,7 @@ function repeat(chr, len) { return s; } -function decodeBytes(buf, encoding, offset, mlen, state) { +function decodeBytes(buf, encoding, offset, mlen, pendoffset, state) { if (!jsencoding) jsencoding = require('../deps/encoding/encoding'); if (jsencoding.encodingExists(encoding)) { @@ -790,7 +790,8 @@ function decodeBytes(buf, encoding, offset, mlen, state) { // normal case where there are no previous partials and we successfully // decoded a single encoded word state.replaces.push({ - fromOffset: offset, + // we ignore linear whitespace between consecutive encoded words + fromOffset: state.consecutive ? pendoffset : offset, toOffset: offset + mlen, val: ret }); @@ -842,7 +843,6 @@ function decodeWords(str, state) { state.consecutive = (pendoffset > -1 ? RE_LWS_ONLY.test(str.substring(pendoffset, m.index)) : false); - pendoffset = m.index + m[0].length; if (m[2].toLowerCase() === 'q') { // q-encoding, similar to quoted-printable bytes = new Buffer(m[3].replace(RE_QENC, qEncReplacer), 'binary'); @@ -850,7 +850,8 @@ function decodeWords(str, state) { // base64 bytes = new Buffer(m[3], 'base64'); } - decodeBytes(bytes, m[1].toLowerCase(), m.index, m[0].length, state); + decodeBytes(bytes, m[1].toLowerCase(), m.index, m[0].length, pendoffset, state); + pendoffset = m.index + m[0].length; } // perform the actual replacements diff --git a/test/test-parse-header.js b/test/test-parse-header.js index 2109599..a762449 100644 --- a/test/test-parse-header.js +++ b/test/test-parse-header.js @@ -38,6 +38,12 @@ var CRLF = '\r\n'; expected: { subject: [ '测试题目与中国信 long subjects are not OK 12 345678901234567890123456789012345678901234567890123456789012345678901234567890' ] }, what: 'Folded header value (one adjacent, one non-adjacent MIME encoded-words)' }, + { source: ['Subject: =?UTF-8?Q?=E0=B9=84=E0=B8=97=E0=B8=A2_=E0=B9=84?=', CRLF, + ' ', CRLF, + ' =?UTF-8?Q?=E0=B8=97=E0=B8=A2_=E0=B9=84=E0=B8=97?= =?UTF-8?Q?=E0=B8=A2?=', CRLF], + expected: { subject: [ 'ไทย ไทย ไทย' ] }, + what: 'Folded header value (adjacent MIME encoded-words seperated by linear whitespace)' + }, // header with body { source: ['Subject: test subject', CRLF, 'X-Another-Header: test', CRLF,