Add option to ignore missing message ID

master
Sven Slootweg 12 years ago
parent 132dfc9b7e
commit 633abafb7b

15
parse

@ -16,6 +16,9 @@ parser.add_argument('-a', dest='attachment_dir', action='store', default='attach
parser.add_argument('-f', '--forced', dest='forced', action='store_true',
help='force insertion into database, even if entries already exist')
parser.add_argument('-I', '--ignore-invalid', dest='ignore_invalid', action='store_true',
help='process invalid e-mail files anyway, for example when missing message-id headers')
args = parser.parse_args()
options = vars(args)
@ -100,8 +103,16 @@ for email_file in file_list:
message = email.message_from_file(open(email_file, 'r'))
if message['message-id'] is None:
if options['ignore_invalid'] == True:
message_id = ""
print "WARNING: %s does not contain a valid message-id header. Empty message-id assumed." % email_file
else:
print "%s is not a valid e-mail file." % email_file
finished += 1
continue
else:
message_id = message['message-id']
if 'subject' not in message or message['subject'] is None:
subject = ""
else:
@ -111,7 +122,7 @@ for email_file in file_list:
htmlbody = ""
attachment_list = []
sha1_hash = hashlib.sha1("%s/%s/%s/%s" % (message['from'], message['to'], message['message-id'], subject)).hexdigest()
sha1_hash = hashlib.sha1("%s/%s/%s/%s" % (message['from'], message['to'], message_id, subject)).hexdigest()
if options['forced'] == False:
cursor.execute("SELECT * FROM emails WHERE `Hash` = ?", (sha1_hash,))
@ -176,7 +187,7 @@ for email_file in file_list:
print "WARNING: Failed to determine unix timestamp for %s." % sha1_hash
try:
new_row = (getheader(message['message-id']), getheader(message['from']), getheader(message['to']), getheader(subject), timestamp, textbody, htmlbody, sha1_hash)
new_row = (getheader(message_id), getheader(message['from']), getheader(message['to']), getheader(subject), timestamp, textbody, htmlbody, sha1_hash)
except UnicodeDecodeError:
print "ERROR: Failed parsing %s, headers could not be decoded." % sha1_hash
continue

@ -7,6 +7,9 @@ parser = argparse.ArgumentParser(description='Parses emails into an SQLite datab
parser.add_argument('-p', dest='pattern', action='store', default='*',
help='glob pattern (including path) that has to be matched for a file to be parsed')
parser.add_argument('-I', '--ignore-invalid', dest='ignore_invalid', action='store_true',
help='process invalid e-mail files anyway, for example when missing message-id headers')
args = parser.parse_args()
options = vars(args)
@ -62,14 +65,22 @@ for email_file in file_list:
message = email.message_from_file(open(email_file, 'r'))
if message['message-id'] is None:
if options['ignore_invalid'] == True:
message_id = ""
print "WARNING: %s does not contain a valid message-id header. Empty message-id assumed." % email_file
else:
print "%s is not a valid e-mail file." % email_file
finished += 1
continue
else:
message_id = message['message-id']
if 'subject' not in message or message['subject'] is None:
subject = ""
else:
subject = message['subject']
sha1_hash = hashlib.sha1("%s/%s/%s/%s" % (message['from'], message['to'], message['message-id'], subject)).hexdigest()
sha1_hash = hashlib.sha1("%s/%s/%s/%s" % (message['from'], message['to'], message_id, subject)).hexdigest()
new_path = "%s/%s.eml" % (os.path.dirname(email_file), sha1_hash)

Loading…
Cancel
Save