From 1498020c2c912bf3833d7e7057042c02bfda553d Mon Sep 17 00:00:00 2001 From: mscdex Date: Mon, 1 Jul 2013 02:57:42 -0400 Subject: [PATCH] Parser: decode encoded-words by default in parseHeader() --- README.md | 2 +- lib/Parser.js | 35 ++++++++++++++++++++++++++++++++--- package.json | 1 + 3 files changed, 34 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 58afddd..1321cd8 100644 --- a/README.md +++ b/README.md @@ -363,7 +363,7 @@ Connection Properties Connection Static Methods ------------------------- -* **parseHeader**(< _string_ >rawHeader) - _object_ - Parses a raw header and returns an object keyed on header fields and the values are Arrays of header field values. +* **parseHeader**(< _string_ >rawHeader[, < _boolean_ >disableAutoDecode]) - _object_ - Parses a raw header and returns an object keyed on header fields and the values are Arrays of header field values. Set `disableAutoDecode` to true to disable automatic decoding of MIME encoded-words that may exist in header field values. Connection Instance Methods diff --git a/lib/Parser.js b/lib/Parser.js index ce0cf7d..e8ecac5 100644 --- a/lib/Parser.js +++ b/lib/Parser.js @@ -2,7 +2,8 @@ var EventEmitter = require('events').EventEmitter, ReadableStream = require('stream').Readable || require('readable-stream'), inherits = require('util').inherits, inspect = require('util').inspect, - utf7 = require('utf7').imap; + utf7 = require('utf7').imap, + iconv = require('iconv-lite'); var CH_LF = 10, LITPLACEHOLDER = String.fromCharCode(0), @@ -17,7 +18,9 @@ var CH_LF = 10, RE_TAGGED = /^A(\d+) (OK|NO|BAD) (?:\[([^\]]+)\] )?(.+)$/i, RE_CONTINUE = /^\+ (?:\[([^\]]+)\] )?(.+)$/i, RE_CRLF = /\r\n/g, - RE_HDR = /^([^:]+):[ \t]?(.+)?$/; + RE_HDR = /^([^:]+):[ \t]?(.+)?$/, + RE_ENCWORD = /=\?([^?]*?)\?([qb])\?(.*?)\?=/gi, + RE_QENC = /(?:=([a-fA-F0-9]{2}))|_/g; function Parser(stream, debug) { if (!(this instanceof Parser)) @@ -637,7 +640,28 @@ function convStr(str, literals) { return str; } -function parseHeader(str) { +function decodeWords(str) { + return str.replace(RE_ENCWORD, + function(match, charset, encoding, word) { + encoding = encoding.toLowerCase(); + if (encoding === 'q') { + // q-encoding, similar to quoted-printable + return iconv.decode(new Buffer(word.replace(RE_QENC, + function(match2, byte) { + if (match2 === '_') + return ' '; + else + return String.fromCharCode(parseInt(byte, 16)); + } + ), 'binary'), charset); + } else { + // base64 + return iconv.decode(new Buffer(word, 'base64'), charset); + } + }); +} + +function parseHeader(str, noDecode) { var lines = str.split(RE_CRLF), len = lines.length, header = {}, @@ -647,6 +671,8 @@ function parseHeader(str) { if (lines[i].length === 0) continue; if (lines[i][0] === '\t' || lines[i][0] === ' ') { + if (!noDecode) + lines[i] = decodeWords(lines[i]); // folded header content // RFC2822 says to just remove the CRLF and not the whitespace following // it, so we follow the RFC and include the leading whitespace ... @@ -656,6 +682,9 @@ function parseHeader(str) { if (m) { h = m[1].toLowerCase(); if (m[2]) { + if (!noDecode) + m[2] = decodeWords(m[2]); + if (header[h] === undefined) header[h] = [m[2]]; else diff --git a/package.json b/package.json index d628a1b..9893020 100644 --- a/package.json +++ b/package.json @@ -5,6 +5,7 @@ "main": "./lib/Connection", "dependencies": { "utf7": "1.0.0", + "iconv-lite": "0.2.10", "readable-stream": "1.0.2" }, "scripts": {