Merge pull request #351 from wavify/mime-decode-lws

Parser: ignore linear whitespace between consecutive mime-encoded words in header
10 years ago · 2e7196e477
parent 26aede8408 8249a257c7
commit 2e7196e477
2 changed files with 11 additions and 4 deletions
--- a/lib/Parser.js
+++ b/lib/Parser.js
@ -744,7 +744,7 @@ function repeat(chr, len) {
  return s;
 }

-function decodeBytes(buf, encoding, offset, mlen, state) {
+function decodeBytes(buf, encoding, offset, mlen, pendoffset, state) {
  if (!jsencoding)
    jsencoding = require('../deps/encoding/encoding');
  if (jsencoding.encodingExists(encoding)) {
@ -789,7 +789,8 @@ function decodeBytes(buf, encoding, offset, mlen, state) {
        // normal case where there are no previous partials and we successfully
        // decoded a single encoded word
        state.replaces.push({
-          fromOffset: offset,
+          // we ignore linear whitespace between consecutive encoded words
+          fromOffset: state.consecutive ? pendoffset : offset,
          toOffset: offset + mlen,
          val: ret
        });
@ -841,7 +842,6 @@ function decodeWords(str, state) {
    state.consecutive = (pendoffset > -1
                         ? RE_LWS_ONLY.test(str.substring(pendoffset, m.index))
                         : false);
-    pendoffset = m.index + m[0].length;
    if (m[2].toLowerCase() === 'q') {
      // q-encoding, similar to quoted-printable
      bytes = new Buffer(m[3].replace(RE_QENC, qEncReplacer), 'binary');
@ -849,7 +849,8 @@ function decodeWords(str, state) {
      // base64
      bytes = new Buffer(m[3], 'base64');
    }
-    decodeBytes(bytes, m[1].toLowerCase(), m.index, m[0].length, state);
+    decodeBytes(bytes, m[1].toLowerCase(), m.index, m[0].length, pendoffset, state);
+    pendoffset = m.index + m[0].length;
  }

  // perform the actual replacements
--- a/test/test-parse-header.js
+++ b/test/test-parse-header.js
@ -46,6 +46,12 @@ var CRLF = '\r\n';
    expected: { subject: [ '测试题目与中国信 long subjects are not OK 12 345678901234567890123456789012345678901234567890123456789012345678901234567890' ] },
    what: 'Folded header value (one adjacent, one non-adjacent MIME encoded-words)'
  },
+  { source: ['Subject: =?UTF-8?Q?=E0=B9=84=E0=B8=97=E0=B8=A2_=E0=B9=84?=', CRLF,
+             '   ', CRLF,
+             ' =?UTF-8?Q?=E0=B8=97=E0=B8=A2_=E0=B9=84=E0=B8=97?=  =?UTF-8?Q?=E0=B8=A2?=', CRLF],
+    expected: { subject: [ 'ไทย ไทย ไทย' ] },
+    what: 'Folded header value (adjacent MIME encoded-words seperated by linear whitespace)'
+  },
  // header with body
  { source: ['Subject: test subject', CRLF,
             'X-Another-Header: test', CRLF,