Parser: fix header parsing so that folded whitespace between adjacent MIME encoded-words is ignored

11 years ago · 4729de2dab
parent 0662d18f50
commit 4729de2dab
2 changed files with 78 additions and 7 deletions
--- a/lib/Parser.js
+++ b/lib/Parser.js
@ -21,6 +21,8 @@ var CH_LF = 10,
    RE_CRLF = /\r\n/g,
    RE_HDR = /^([^:]+):[ \t]?(.+)?$/,
    RE_ENCWORD = /=\?([^?]*?)\?([qb])\?(.*?)\?=/gi,
+    RE_ENCWORD_END = /=\?([^?]*?)\?([qb])\?(.*?)\?=$/i,
+    RE_ENCWORD_BEGIN = /^[ \t]=\?([^?]*?)\?([qb])\?(.*?)\?=/i,
    RE_QENC = /(?:=([a-fA-F0-9]{2}))|_/g,
    RE_SEARCH_MODSEQ = /^(.+) \(MODSEQ (.+?)\)$/i;

@ -713,22 +715,28 @@ function parseHeader(str, noDecode) {
  var lines = str.split(RE_CRLF),
      len = lines.length,
      header = {},
-      m, h;
+      m, h, val;

  for (var i = 0; i < len; ++i) {
    if (lines[i].length === 0)
      continue;
    if (lines[i][0] === '\t' || lines[i][0] === ' ') {
-      if (!noDecode)
-        lines[i] = decodeWords(lines[i]);
      // folded header content
-      // RFC2822 says to just remove the CRLF and not the whitespace following
-      // it, so we follow the RFC and include the leading whitespace ...
-      header[h][header[h].length - 1] += lines[i];
+      val = lines[i];
+      if (!noDecode) {
+        if (RE_ENCWORD_END.test(lines[i - 1])
+            && RE_ENCWORD_BEGIN.test(val)) {
+          // RFC2047 says to *ignore* leading whitespace in folded header values
+          // for adjacent encoded-words ...
+          val = val.substring(1);
+        }
+        val = decodeWords(val);
+      }
+      header[h][header[h].length - 1] += val;
    } else {
      m = RE_HDR.exec(lines[i]);
      if (m) {
-        h = m[1].toLowerCase();
+        h = m[1].toLowerCase().trim();
        if (m[2]) {
          if (!noDecode)
            m[2] = decodeWords(m[2]);
--- a/test/test-parse-header.js
+++ b/test/test-parse-header.js
@ -0,0 +1,63 @@
+var parseHeader = require('../lib/Parser').parseHeader;
+
+var assert = require('assert'),
+    inspect = require('util').inspect;
+
+var CRLF = '\r\n';
+
+[
+  { source: ['To: Foo', CRLF,
+             ' Bar Baz', CRLF],
+    expected: { to: [ 'Foo Bar Baz' ] },
+    what: 'Folded header value (plain -- space)'
+  },
+  { source: ['To: Foo', CRLF,
+             '\tBar\tBaz', CRLF],
+    expected: { to: [ 'Foo\tBar\tBaz' ] },
+    what: 'Folded header value (plain -- tab)'
+  },
+  { source: ['Subject: =?iso-8859-1?Q?=A1Hola,_se=F1or!?=', CRLF],
+    expected: { subject: [ '¡Hola, señor!' ] },
+    what: 'MIME encoded-word in value'
+  },
+  { source: ['Subject: =?GB2312?Q?=B2=E2=CA=D4=CC=E2=C4=BF=D3=EB=D6=D0=B9=FA=D0=C5_long_subjects_are_not_OK_12?=', CRLF,
+             ' =?GB2312?Q?345678901234567890123456789012345678901234567890123456789012?=', CRLF,
+             ' =?GB2312?Q?345678901234567890?=', CRLF],
+    expected: { subject: [ '测试题目与中国信 long subjects are not OK 12345678901234567890123456789012345678901234567890123456789012345678901234567890' ] },
+    what: 'Folded header value (adjacent MIME encoded-words)'
+  },
+  { source: ['Subject: =?GB2312?Q?=B2=E2=CA=D4=CC=E2=C4=BF=D3=EB=D6=D0=B9=FA=D0=C5_long_subjects_are_not_OK_12?=', CRLF,
+             ' 3=?GB2312?Q?45678901234567890123456789012345678901234567890123456789012?=', CRLF,
+             ' 3=?GB2312?Q?45678901234567890?=', CRLF],
+    expected: { subject: [ '测试题目与中国信 long subjects are not OK 12 345678901234567890123456789012345678901234567890123456789012 345678901234567890' ] },
+    what: 'Folded header value (non-adjacent MIME encoded-words)'
+  },
+  { source: ['Subject: =?GB2312?Q?=B2=E2=CA=D4=CC=E2=C4=BF=D3=EB=D6=D0=B9=FA=D0=C5_long_subjects_are_not_OK_12?=', CRLF,
+             ' 3=?GB2312?Q?45678901234567890123456789012345678901234567890123456789012?=', CRLF,
+             ' =?GB2312?Q?345678901234567890?=', CRLF],
+    expected: { subject: [ '测试题目与中国信 long subjects are not OK 12 345678901234567890123456789012345678901234567890123456789012345678901234567890' ] },
+    what: 'Folded header value (one adjacent, one non-adjacent MIME encoded-words)'
+  },
+].forEach(function(v) {
+  var result;
+
+  try {
+    result = parseHeader(v.source.join(''));
+  } catch (e) {
+    console.log(makeMsg(v.what, 'JS Exception: ' + e.stack));
+    return;
+  }
+
+  assert.deepEqual(result,
+                   v.expected,
+                   makeMsg(v.what,
+                           'Result mismatch:'
+                           + '\nParsed: ' + inspect(result, false, 10)
+                           + '\nExpected: ' + inspect(v.expected, false, 10)
+                   )
+                  );
+});
+
+function makeMsg(what, msg) {
+  return '[' + what + ']: ' + msg;
+}