From bda686499d6c7e6c37fd9201a25e5696484b5162 Mon Sep 17 00:00:00 2001 From: Sven Slootweg Date: Mon, 28 May 2012 08:55:58 +0200 Subject: [PATCH] Skip parsing of message if headers cannot be decoded. --- parse | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/parse b/parse index f0b2503..a272673 100755 --- a/parse +++ b/parse @@ -175,7 +175,12 @@ for email_file in file_list: timestamp = 0 print "WARNING: Failed to determine unix timestamp for %s." % sha1_hash - new_row = (getheader(message['message-id']), getheader(message['from']), getheader(message['to']), getheader(subject), timestamp, textbody, htmlbody, sha1_hash) + try: + new_row = (getheader(message['message-id']), getheader(message['from']), getheader(message['to']), getheader(subject), timestamp, textbody, htmlbody, sha1_hash) + except UnicodeDecodeError: + print "ERROR: Failed parsing %s, headers could not be decoded." % sha1_hash + continue + cursor.execute("INSERT INTO emails VALUES (?, ?, ?, ?, ?, ?, ?, ?)", new_row) print "Successfully parsed and inserted e-mail with SHA1 hash %s." % sha1_hash