From 4729de2dab3cc4dd45436cd2f236f06a78114da2 Mon Sep 17 00:00:00 2001 From: mscdex Date: Tue, 23 Jul 2013 10:33:15 -0400 Subject: [PATCH] Parser: fix header parsing so that folded whitespace between adjacent MIME encoded-words is ignored --- lib/Parser.js | 22 +++++++++----- test/test-parse-header.js | 63 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+), 7 deletions(-) create mode 100644 test/test-parse-header.js diff --git a/lib/Parser.js b/lib/Parser.js index 3398059..35e37d8 100644 --- a/lib/Parser.js +++ b/lib/Parser.js @@ -21,6 +21,8 @@ var CH_LF = 10, RE_CRLF = /\r\n/g, RE_HDR = /^([^:]+):[ \t]?(.+)?$/, RE_ENCWORD = /=\?([^?]*?)\?([qb])\?(.*?)\?=/gi, + RE_ENCWORD_END = /=\?([^?]*?)\?([qb])\?(.*?)\?=$/i, + RE_ENCWORD_BEGIN = /^[ \t]=\?([^?]*?)\?([qb])\?(.*?)\?=/i, RE_QENC = /(?:=([a-fA-F0-9]{2}))|_/g, RE_SEARCH_MODSEQ = /^(.+) \(MODSEQ (.+?)\)$/i; @@ -713,22 +715,28 @@ function parseHeader(str, noDecode) { var lines = str.split(RE_CRLF), len = lines.length, header = {}, - m, h; + m, h, val; for (var i = 0; i < len; ++i) { if (lines[i].length === 0) continue; if (lines[i][0] === '\t' || lines[i][0] === ' ') { - if (!noDecode) - lines[i] = decodeWords(lines[i]); // folded header content - // RFC2822 says to just remove the CRLF and not the whitespace following - // it, so we follow the RFC and include the leading whitespace ... - header[h][header[h].length - 1] += lines[i]; + val = lines[i]; + if (!noDecode) { + if (RE_ENCWORD_END.test(lines[i - 1]) + && RE_ENCWORD_BEGIN.test(val)) { + // RFC2047 says to *ignore* leading whitespace in folded header values + // for adjacent encoded-words ... + val = val.substring(1); + } + val = decodeWords(val); + } + header[h][header[h].length - 1] += val; } else { m = RE_HDR.exec(lines[i]); if (m) { - h = m[1].toLowerCase(); + h = m[1].toLowerCase().trim(); if (m[2]) { if (!noDecode) m[2] = decodeWords(m[2]); diff --git a/test/test-parse-header.js b/test/test-parse-header.js new file mode 100644 index 0000000..9caf7de --- /dev/null +++ b/test/test-parse-header.js @@ -0,0 +1,63 @@ +var parseHeader = require('../lib/Parser').parseHeader; + +var assert = require('assert'), + inspect = require('util').inspect; + +var CRLF = '\r\n'; + +[ + { source: ['To: Foo', CRLF, + ' Bar Baz', CRLF], + expected: { to: [ 'Foo Bar Baz' ] }, + what: 'Folded header value (plain -- space)' + }, + { source: ['To: Foo', CRLF, + '\tBar\tBaz', CRLF], + expected: { to: [ 'Foo\tBar\tBaz' ] }, + what: 'Folded header value (plain -- tab)' + }, + { source: ['Subject: =?iso-8859-1?Q?=A1Hola,_se=F1or!?=', CRLF], + expected: { subject: [ '¡Hola, señor!' ] }, + what: 'MIME encoded-word in value' + }, + { source: ['Subject: =?GB2312?Q?=B2=E2=CA=D4=CC=E2=C4=BF=D3=EB=D6=D0=B9=FA=D0=C5_long_subjects_are_not_OK_12?=', CRLF, + ' =?GB2312?Q?345678901234567890123456789012345678901234567890123456789012?=', CRLF, + ' =?GB2312?Q?345678901234567890?=', CRLF], + expected: { subject: [ '测试题目与中国信 long subjects are not OK 12345678901234567890123456789012345678901234567890123456789012345678901234567890' ] }, + what: 'Folded header value (adjacent MIME encoded-words)' + }, + { source: ['Subject: =?GB2312?Q?=B2=E2=CA=D4=CC=E2=C4=BF=D3=EB=D6=D0=B9=FA=D0=C5_long_subjects_are_not_OK_12?=', CRLF, + ' 3=?GB2312?Q?45678901234567890123456789012345678901234567890123456789012?=', CRLF, + ' 3=?GB2312?Q?45678901234567890?=', CRLF], + expected: { subject: [ '测试题目与中国信 long subjects are not OK 12 345678901234567890123456789012345678901234567890123456789012 345678901234567890' ] }, + what: 'Folded header value (non-adjacent MIME encoded-words)' + }, + { source: ['Subject: =?GB2312?Q?=B2=E2=CA=D4=CC=E2=C4=BF=D3=EB=D6=D0=B9=FA=D0=C5_long_subjects_are_not_OK_12?=', CRLF, + ' 3=?GB2312?Q?45678901234567890123456789012345678901234567890123456789012?=', CRLF, + ' =?GB2312?Q?345678901234567890?=', CRLF], + expected: { subject: [ '测试题目与中国信 long subjects are not OK 12 345678901234567890123456789012345678901234567890123456789012345678901234567890' ] }, + what: 'Folded header value (one adjacent, one non-adjacent MIME encoded-words)' + }, +].forEach(function(v) { + var result; + + try { + result = parseHeader(v.source.join('')); + } catch (e) { + console.log(makeMsg(v.what, 'JS Exception: ' + e.stack)); + return; + } + + assert.deepEqual(result, + v.expected, + makeMsg(v.what, + 'Result mismatch:' + + '\nParsed: ' + inspect(result, false, 10) + + '\nExpected: ' + inspect(v.expected, false, 10) + ) + ); +}); + +function makeMsg(what, msg) { + return '[' + what + ']: ' + msg; +} \ No newline at end of file