Parser: fix premature decoding of encoded words ending with an incomplete multi-byte character

11 years ago · b630ed05ce
parent 397836e94c
commit b630ed05ce
2 changed files with 40 additions and 10 deletions
--- a/lib/Parser.js
+++ b/lib/Parser.js
@ -836,21 +836,46 @@ function decodeWords(str, state) {
  var pendoffset = -1;
  state.replaces = [];

-  var bytes, m, i, j, lenj, seq;
-  // generate replacement substrings and their positions
+  var bytes, m, i, j, leni, lenj, seq, replaces = [], lastReplace = {};
+  
+  // join consecutive encoded words that have the same charset and encoding first
  while (m = RE_ENCWORD.exec(str)) {
-    state.consecutive = (pendoffset > -1
-                         ? RE_LWS_ONLY.test(str.substring(pendoffset, m.index))
-                         : false);
-    if (m[2].toLowerCase() === 'q') {
+    seq = {
+      consecutive: (pendoffset > -1
+                    ? RE_LWS_ONLY.test(str.substring(pendoffset, m.index))
+                    : false),
+      charset: m[1].toLowerCase(),
+      encoding: m[2].toLowerCase(),
+      chunk: m[3],
+      index: m.index,
+      length: m[0].length,
+      pendoffset: pendoffset,
+    };
+    lastReplace = replaces.length && replaces[replaces.length - 1];
+    if (seq.consecutive
+        && seq.charset === lastReplace.charset
+        && seq.encoding === lastReplace.encoding) {
+      lastReplace.length += seq.length + seq.index - pendoffset;
+      lastReplace.chunk += seq.chunk;
+    } else {
+      replaces.push(seq);
+      lastReplace = seq;
+    }
+    pendoffset = m.index + m[0].length;
+  }
+
+  // generate replacement substrings and their positions
+  for (i = 0, leni = replaces.length; i < leni; ++i) {
+    m = replaces[i];
+    state.consecutive = m.consecutive;
+    if (m.encoding === 'q') {
      // q-encoding, similar to quoted-printable
-      bytes = new Buffer(m[3].replace(RE_QENC, qEncReplacer), 'binary');
+      bytes = new Buffer(m.chunk.replace(RE_QENC, qEncReplacer), 'binary');
    } else {
      // base64
-      bytes = new Buffer(m[3], 'base64');
+      bytes = new Buffer(m.chunk, 'base64');
    }
-    decodeBytes(bytes, m[1].toLowerCase(), m.index, m[0].length, pendoffset, state);
-    pendoffset = m.index + m[0].length;
+    decodeBytes(bytes, m.charset, m.index, m.length, m.pendoffset, state);
  }

  // perform the actual replacements
--- a/test/test-parse-header.js
+++ b/test/test-parse-header.js
@ -52,6 +52,11 @@ var CRLF = '\r\n';
    expected: { subject: [ 'ไทย ไทย ไทย' ] },
    what: 'Folded header value (adjacent MIME encoded-words seperated by linear whitespace)'
  },
+  { source: ['Subject: =?utf-8?Q?abcdefghij_=E0=B9=83=E0=B8=99_klmnopqr_=E0=B9=84=E0=B8=A1=E0=B9?=', CRLF,
+             ' =?utf-8?Q?=88=E0=B8=82=E0=B8=B6=E0=B9=89=E0=B8=99?=', CRLF],
+    expected: { subject: [ 'abcdefghij ใน klmnopqr ไม่ขึ้น' ] },
+    what: 'Folded header value (incomplete multi-byte character split)'
+  },
  // header with body
  { source: ['Subject: test subject', CRLF,
             'X-Another-Header: test', CRLF,