From 0c9baf755dbe547318cb3ee1bc158e5f3d1661c9 Mon Sep 17 00:00:00 2001 From: Brian White Date: Mon, 23 Jul 2012 00:14:28 -0400 Subject: [PATCH] Use node's built-in HTTP parser for headers or headers and bodies --- README.md | 6 ++-- lib/imap.js | 84 ++++++++++++++++++++++++++++++------------- lib/imap.parsers.js | 15 +------- lib/imap.utilities.js | 2 +- 4 files changed, 64 insertions(+), 43 deletions(-) diff --git a/README.md b/README.md index 9b2767f..0f94af9 100644 --- a/README.md +++ b/README.md @@ -89,7 +89,7 @@ node-imap exposes one object: **ImapConnection**. * **seqno** - An Integer that designates this message's sequence number. This number changes when messages with smaller sequence numbers are deleted for example (see the ImapConnection's 'deleted' event). * **flags** - An Array containing the flags currently set on this message. * **date** - A String containing the internal server date for the message (always represented in GMT?) - * **headers** - An Object containing the headers of the message, **if headers were requested when calling fetch().** Note: The value of each property in the object is an Array containing the value(s) for that particular header name (just in case there are duplicate headers). + * **headers** - An Object containing the headers of the message, **if headers were requested when calling fetch().** Note: Duplicate headers are dealt with by storing the duplicated values in an array keyed on the header name (e.g. { to: ['foo@bar.com', 'bar@baz.com'] }). * **structure** - An Array containing the structure of the message, **if the structure was requested when calling fetch().** See below for an explanation of the format of this property. * Events: * **data**(String) - Emitted for each message body chunk if a message body is being fetched @@ -367,10 +367,10 @@ ImapConnection Functions * **fetch**(Integer/String/Array, Object) - _ImapFetch_ - Fetches the message(s) identified by the first parameter, in the currently open mailbox. The first parameter can either be an Integer for a single message ID, a String for a message ID range (e.g. '2504:2507' or '\*' or '2504:\*'), or an Array containing any number of the aforementioned Integers and/or Strings. The second (Object) parameter is a set of options used to determine how and what exactly to fetch. The valid options are: * **markSeen** - A Boolean indicating whether to mark the message(s) as read when fetching it. **Default:** false - * **request** - An Object indicating what to fetch (at least **headers** OR **body** must be set to false -- in other words, you can only fetch one aspect of the message at a time): + * **request** - An Object indicating what to fetch (at least **headers** OR **body** must be set to false -- in other words, you can only fetch one aspect of the message at a time, the exception being if both are set to true which is the same as fetching a 'full' body, but with parsed headers): * **struct** - A Boolean indicating whether to fetch the structure of the message. **Default:** true * **headers** - A Boolean/Array value. A value of true fetches all message headers. An Array containing specific message headers to retrieve can also be specified. **Default:** true - * **body** - A Boolean/String/Array value. A Boolean value of true fetches the entire raw message body. A String value containing a valid partID (see _FetchResult_'s structure property) fetches the entire body/content of that particular part, or a String value of 'full' fetches the entire email message, including the headers. An Array value of length 2 can be specified if you wish to request a byte range of the content, where the first item is a Boolean/String as previously described and the second item is a String indicating the byte range, for example, to fetch the first 500 bytes: '0-500'. **Default:** false + * **body** - A Boolean/String/Array value. A Boolean value of true fetches the entire raw message body. A String value containing a valid partID (see _FetchResult_'s structure property) fetches the entire body/content of that particular part, or a String value of 'full' fetches the entire email message, including the headers (unparsed). An Array value of length 2 can be specified if you wish to request a byte range of the content, where the first item is a Boolean/String as previously described and the second item is a String indicating the byte range, for example, to fetch the first 500 bytes: '0-500'. **Default:** false * **copy**(Integer/String/Array, String, Function) - _(void)_ - Copies the message(s) with the message ID(s) identified by the first parameter, in the currently open mailbox, to the mailbox specified by the second parameter. The first parameter can either be an Integer for a single message ID, a String for a message ID range (e.g. '2504:2507' or '\*' or '2504:\*'), or an Array containing any number of the aforementioned Integers and/or Strings. The Function parameter is the callback with one parameter: the error (null if none). diff --git a/lib/imap.js b/lib/imap.js index ebec556..eac31a5 100644 --- a/lib/imap.js +++ b/lib/imap.js @@ -1,6 +1,7 @@ var util = require('util'), Socket = require('net').Socket, - EventEmitter = require('events').EventEmitter; + EventEmitter = require('events').EventEmitter, + HTTPParser = process.binding('http_parser').HTTPParser; var parsers = require('./imap.parsers'), utils = require('./imap.utilities'); @@ -13,7 +14,9 @@ var CRLF = '\r\n', AUTH: 2, BOXSELECTING: 3, BOXSELECTED: 4 - }, BOX_ATTRIBS = ['NOINFERIORS', 'NOSELECT', 'MARKED', 'UNMARKED'], + }, + BOX_ATTRIBS = ['NOINFERIORS', 'NOSELECT', 'MARKED', 'UNMARKED'], + FAKE_HTTP_RESPONSE = new Buffer('HTTP/1.1 200 OK\r\n'); reFetch = /^\* (\d+) FETCH .+? \{(\d+)\}\r\n/; // extension constants @@ -194,10 +197,8 @@ ImapConnection.prototype.connect = function(loginCb) { } if (chunk && chunk.length) { - if (curReq._msgtype === 'headers') { - chunk.copy(self._state.curData, curReq.curPos, 0); - curReq.curPos += chunk.length; - } + if (curReq._useParser) + self._state.parser.execute(chunk, 0, chunk.length); else curReq._msg.emit('data', chunk); } @@ -205,8 +206,8 @@ ImapConnection.prototype.connect = function(loginCb) { if (curReq._done) { var restDesc; if (curReq._done === 1) { - if (curReq._msgtype === 'headers') - curReq._headers = self._state.curData.toString(); + if (curReq._useParser) + self._state.parser.finish(); self._state.curData = null; curReq._done = true; } @@ -223,8 +224,7 @@ ImapConnection.prototype.connect = function(loginCb) { restDesc[1] = ' ' + restDesc[1]; } else restDesc[1] = ''; - parsers.parseFetch(curReq._desc + restDesc[1], curReq._headers, - curReq._msg); + parsers.parseFetch(curReq._desc + restDesc[1], curReq._msg); var curData = self._state.curData; data = curData.slice(utils.bufferIndexOf(curData, CRLF) + 2); curReq._done = false; @@ -254,10 +254,38 @@ ImapConnection.prototype.connect = function(loginCb) { curReq._desc = desc; curReq._msg = msg; curReq._fetcher.emit('message', msg); - curReq._msgtype = (type.indexOf('HEADER') === 0 ? 'headers' : 'body'); - if (curReq._msgtype === 'headers') { - self._state.curData = new Buffer(self._state.curExpected); - curReq.curPos = 0; + if (curReq._useParser) { + // use node's built-in HTTP parser for parsing headers or headers and + // bodies + if (self._state.parser) + self._state.parser.reinitialize(HTTPParser.RESPONSE); + else { + self._state.parser = new HTTPParser(HTTPParser.RESPONSE); + self._state.parser.onHeadersComplete = function(info) { + var headers = {}; + + for (var i=0,k,len=info.headers.length; i -1) { @@ -715,11 +743,10 @@ ImapConnection.prototype._fetch = function(which, uids, options) { markSeen: false, request: { struct: true, - headers: true, // \_______ at most one of these can be used for any given - // _______ fetch request - body: false // / + headers: true, + body: false } - }, toFetch, bodyRange = '', self = this; + }, toFetch, useParser = false, bodyRange = '', self = this; if (typeof options !== 'object') options = {}; utils.extend(true, opts, options); @@ -737,18 +764,22 @@ ImapConnection.prototype._fetch = function(which, uids, options) { + '>'; opts.request.body = opts.request.body[0]; } - if (typeof opts.request.headers === 'boolean' - && opts.request.headers === true) { + if (opts.request.headers === true && opts.request.body === true) { + // fetches the whole entire message (including the headers) + toFetch = ''; + useParser = true; + } else if (opts.request.headers === true) { // fetches headers only toFetch = 'HEADER'; - } else if (typeof opts.request.body === 'boolean' - && opts.request.body === true) { + useParser = true; + } else if (opts.request.body === true) { // fetches the whole entire message text (minus the headers), including // all message parts toFetch = 'TEXT'; } else if (typeof opts.request.body === 'string') { if (opts.request.body.toUpperCase() === 'FULL') { // fetches the whole entire message (including the headers) + // NOTE: does NOT parse the headers! toFetch = ''; } else if (/^([\d]+[\.]{0,1})*[\d]+$/.test(opts.request.body)) { // specific message part identifier, e.g. '1', '2', '1.1', '1.2', etc @@ -760,6 +791,7 @@ ImapConnection.prototype._fetch = function(which, uids, options) { // fetch specific headers only toFetch = 'HEADER.FIELDS (' + opts.request.headers.join(' ').toUpperCase() + ')'; + useParser = true; } var extensions = ''; @@ -782,8 +814,10 @@ ImapConnection.prototype._fetch = function(which, uids, options) { fetcher.emit('end'); } ); - var imapFetcher = new ImapFetch(); - this._state.requests[this._state.requests.length-1]._fetcher = imapFetcher; + var imapFetcher = new ImapFetch(), + req = this._state.requests[this._state.requests.length - 1]; + req._fetcher = imapFetcher; + req._useParser = useParser; return imapFetcher; }; diff --git a/lib/imap.parsers.js b/lib/imap.parsers.js index b4c3997..b942512 100644 --- a/lib/imap.parsers.js +++ b/lib/imap.parsers.js @@ -43,7 +43,7 @@ exports.parseNamespaces = function(str, namespaces) { } } -exports.parseFetch = function(str, literalData, fetchData) { +exports.parseFetch = function(str, fetchData) { var key, idxNext, result = exports.parseExpr(str); for (var i=0,len=result.length; i