|
|
|
@ -16,6 +16,9 @@ parser.add_argument('-a', dest='attachment_dir', action='store', default='attach
|
|
|
|
|
parser.add_argument('-f', '--forced', dest='forced', action='store_true',
|
|
|
|
|
help='force insertion into database, even if entries already exist')
|
|
|
|
|
|
|
|
|
|
parser.add_argument('-I', '--ignore-invalid', dest='ignore_invalid', action='store_true',
|
|
|
|
|
help='process invalid e-mail files anyway, for example when missing message-id headers')
|
|
|
|
|
|
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
options = vars(args)
|
|
|
|
|
|
|
|
|
@ -100,8 +103,16 @@ for email_file in file_list:
|
|
|
|
|
message = email.message_from_file(open(email_file, 'r'))
|
|
|
|
|
|
|
|
|
|
if message['message-id'] is None:
|
|
|
|
|
if options['ignore_invalid'] == True:
|
|
|
|
|
message_id = ""
|
|
|
|
|
print "WARNING: %s does not contain a valid message-id header. Empty message-id assumed." % email_file
|
|
|
|
|
else:
|
|
|
|
|
print "%s is not a valid e-mail file." % email_file
|
|
|
|
|
finished += 1
|
|
|
|
|
continue
|
|
|
|
|
else:
|
|
|
|
|
message_id = message['message-id']
|
|
|
|
|
|
|
|
|
|
if 'subject' not in message or message['subject'] is None:
|
|
|
|
|
subject = ""
|
|
|
|
|
else:
|
|
|
|
@ -111,7 +122,7 @@ for email_file in file_list:
|
|
|
|
|
htmlbody = ""
|
|
|
|
|
attachment_list = []
|
|
|
|
|
|
|
|
|
|
sha1_hash = hashlib.sha1("%s/%s/%s/%s" % (message['from'], message['to'], message['message-id'], subject)).hexdigest()
|
|
|
|
|
sha1_hash = hashlib.sha1("%s/%s/%s/%s" % (message['from'], message['to'], message_id, subject)).hexdigest()
|
|
|
|
|
|
|
|
|
|
if options['forced'] == False:
|
|
|
|
|
cursor.execute("SELECT * FROM emails WHERE `Hash` = ?", (sha1_hash,))
|
|
|
|
@ -176,7 +187,7 @@ for email_file in file_list:
|
|
|
|
|
print "WARNING: Failed to determine unix timestamp for %s." % sha1_hash
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
new_row = (getheader(message['message-id']), getheader(message['from']), getheader(message['to']), getheader(subject), timestamp, textbody, htmlbody, sha1_hash)
|
|
|
|
|
new_row = (getheader(message_id), getheader(message['from']), getheader(message['to']), getheader(subject), timestamp, textbody, htmlbody, sha1_hash)
|
|
|
|
|
except UnicodeDecodeError:
|
|
|
|
|
print "ERROR: Failed parsing %s, headers could not be decoded." % sha1_hash
|
|
|
|
|
continue
|
|
|
|
|