From b630ed05cef681c164d35934ab23782c64bfe03d Mon Sep 17 00:00:00 2001
From: Chotiwat Chawannakul <chotiwat@throughwave.co.th>
Date: Thu, 13 Mar 2014 20:02:27 +0700
Subject: [PATCH] Parser: fix premature decoding of encoded words ending with
 an incomplete multi-byte character

---
 lib/Parser.js             | 45 ++++++++++++++++++++++++++++++---------
 test/test-parse-header.js |  5 +++++
 2 files changed, 40 insertions(+), 10 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 8a5e28f..a98cf58 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -836,21 +836,46 @@ function decodeWords(str, state) {
   var pendoffset = -1;
   state.replaces = [];
 
-  var bytes, m, i, j, lenj, seq;
-  // generate replacement substrings and their positions
+  var bytes, m, i, j, leni, lenj, seq, replaces = [], lastReplace = {};
+  
+  // join consecutive encoded words that have the same charset and encoding first
   while (m = RE_ENCWORD.exec(str)) {
-    state.consecutive = (pendoffset > -1
-                         ? RE_LWS_ONLY.test(str.substring(pendoffset, m.index))
-                         : false);
-    if (m[2].toLowerCase() === 'q') {
+    seq = {
+      consecutive: (pendoffset > -1
+                    ? RE_LWS_ONLY.test(str.substring(pendoffset, m.index))
+                    : false),
+      charset: m[1].toLowerCase(),
+      encoding: m[2].toLowerCase(),
+      chunk: m[3],
+      index: m.index,
+      length: m[0].length,
+      pendoffset: pendoffset,
+    };
+    lastReplace = replaces.length && replaces[replaces.length - 1];
+    if (seq.consecutive
+        && seq.charset === lastReplace.charset
+        && seq.encoding === lastReplace.encoding) {
+      lastReplace.length += seq.length + seq.index - pendoffset;
+      lastReplace.chunk += seq.chunk;
+    } else {
+      replaces.push(seq);
+      lastReplace = seq;
+    }
+    pendoffset = m.index + m[0].length;
+  }
+
+  // generate replacement substrings and their positions
+  for (i = 0, leni = replaces.length; i < leni; ++i) {
+    m = replaces[i];
+    state.consecutive = m.consecutive;
+    if (m.encoding === 'q') {
       // q-encoding, similar to quoted-printable
-      bytes = new Buffer(m[3].replace(RE_QENC, qEncReplacer), 'binary');
+      bytes = new Buffer(m.chunk.replace(RE_QENC, qEncReplacer), 'binary');
     } else {
       // base64
-      bytes = new Buffer(m[3], 'base64');
+      bytes = new Buffer(m.chunk, 'base64');
     }
-    decodeBytes(bytes, m[1].toLowerCase(), m.index, m[0].length, pendoffset, state);
-    pendoffset = m.index + m[0].length;
+    decodeBytes(bytes, m.charset, m.index, m.length, m.pendoffset, state);
   }
 
   // perform the actual replacements
diff --git a/test/test-parse-header.js b/test/test-parse-header.js
index 3c47db9..71df863 100644
--- a/test/test-parse-header.js
+++ b/test/test-parse-header.js
@@ -52,6 +52,11 @@ var CRLF = '\r\n';
     expected: { subject: [ 'ไทย ไทย ไทย' ] },
     what: 'Folded header value (adjacent MIME encoded-words seperated by linear whitespace)'
   },
+  { source: ['Subject: =?utf-8?Q?abcdefghij_=E0=B9=83=E0=B8=99_klmnopqr_=E0=B9=84=E0=B8=A1=E0=B9?=', CRLF,
+             ' =?utf-8?Q?=88=E0=B8=82=E0=B8=B6=E0=B9=89=E0=B8=99?=', CRLF],
+    expected: { subject: [ 'abcdefghij ใน klmnopqr ไม่ขึ้น' ] },
+    what: 'Folded header value (incomplete multi-byte character split)'
+  },
   // header with body
   { source: ['Subject: test subject', CRLF,
              'X-Another-Header: test', CRLF,