|
|
@ -90,8 +90,10 @@ for email_file in file_list:
|
|
|
|
# To save time when updating a database, let's first check whether the filename is already present in the database as a hash.
|
|
|
|
# To save time when updating a database, let's first check whether the filename is already present in the database as a hash.
|
|
|
|
# There is no need to check for the format here, since if the filename is not a valid hash, it simply won't match anything.
|
|
|
|
# There is no need to check for the format here, since if the filename is not a valid hash, it simply won't match anything.
|
|
|
|
sha1_hash = os.path.splitext(os.path.split(email_file)[1])[0]
|
|
|
|
sha1_hash = os.path.splitext(os.path.split(email_file)[1])[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if options['forced'] == False:
|
|
|
|
cursor.execute("SELECT * FROM emails WHERE `Hash` = ?", (sha1_hash,))
|
|
|
|
cursor.execute("SELECT * FROM emails WHERE `Hash` = ?", (sha1_hash,))
|
|
|
|
if len(cursor.fetchall()) > 0 and options['forced'] == False:
|
|
|
|
if len(cursor.fetchall()) > 0:
|
|
|
|
print "Skipping %s, already exists in the database according to filename." % sha1_hash
|
|
|
|
print "Skipping %s, already exists in the database according to filename." % sha1_hash
|
|
|
|
continue
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
@ -111,8 +113,12 @@ for email_file in file_list:
|
|
|
|
|
|
|
|
|
|
|
|
sha1_hash = hashlib.sha1("%s/%s/%s/%s" % (message['from'], message['to'], message['message-id'], subject)).hexdigest()
|
|
|
|
sha1_hash = hashlib.sha1("%s/%s/%s/%s" % (message['from'], message['to'], message['message-id'], subject)).hexdigest()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if options['forced'] == False:
|
|
|
|
cursor.execute("SELECT * FROM emails WHERE `Hash` = ?", (sha1_hash,))
|
|
|
|
cursor.execute("SELECT * FROM emails WHERE `Hash` = ?", (sha1_hash,))
|
|
|
|
if len(cursor.fetchall()) == 0 or options['forced'] == True:
|
|
|
|
if len(cursor.fetchall()) > 0:
|
|
|
|
|
|
|
|
print "Skipping %s, already exists in the database according to message hash." % sha1_hash
|
|
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
message_parts = [find_submessages(message)]
|
|
|
|
message_parts = [find_submessages(message)]
|
|
|
|
message_parts = flatten(message_parts)
|
|
|
|
message_parts = flatten(message_parts)
|
|
|
|
|
|
|
|
|
|
|
@ -172,20 +178,20 @@ for email_file in file_list:
|
|
|
|
new_row = (getheader(message['message-id']), getheader(message['from']), getheader(message['to']), getheader(subject), timestamp, textbody, htmlbody, sha1_hash)
|
|
|
|
new_row = (getheader(message['message-id']), getheader(message['from']), getheader(message['to']), getheader(subject), timestamp, textbody, htmlbody, sha1_hash)
|
|
|
|
cursor.execute("INSERT INTO emails VALUES (?, ?, ?, ?, ?, ?, ?, ?)", new_row)
|
|
|
|
cursor.execute("INSERT INTO emails VALUES (?, ?, ?, ?, ?, ?, ?, ?)", new_row)
|
|
|
|
print "Successfully parsed and inserted e-mail with SHA1 hash %s." % sha1_hash
|
|
|
|
print "Successfully parsed and inserted e-mail with SHA1 hash %s." % sha1_hash
|
|
|
|
else:
|
|
|
|
|
|
|
|
print "Skipping %s, already exists in the database." % sha1_hash
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if len(attachment_list) > 0:
|
|
|
|
if len(attachment_list) > 0:
|
|
|
|
inserted = 0
|
|
|
|
inserted = 0
|
|
|
|
|
|
|
|
|
|
|
|
for attachment in attachment_list:
|
|
|
|
for attachment in attachment_list:
|
|
|
|
|
|
|
|
if options['forced'] == False:
|
|
|
|
cursor.execute("SELECT * FROM attachments WHERE `Hash` = ?", (attachment[2],))
|
|
|
|
cursor.execute("SELECT * FROM attachments WHERE `Hash` = ?", (attachment[2],))
|
|
|
|
if len(cursor.fetchall()) == 0 or options['forced'] == True:
|
|
|
|
if len(cursor.fetchall()) > 0:
|
|
|
|
|
|
|
|
print "Skipping attachment %s, already exists in the database." % attachment[2]
|
|
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
new_row = (sha1_hash, attachment[0], attachment[1], attachment[2], attachment[3])
|
|
|
|
new_row = (sha1_hash, attachment[0], attachment[1], attachment[2], attachment[3])
|
|
|
|
cursor.execute("INSERT INTO attachments VALUES (?, ?, ?, ?, ?)", new_row)
|
|
|
|
cursor.execute("INSERT INTO attachments VALUES (?, ?, ?, ?, ?)", new_row)
|
|
|
|
inserted += 1
|
|
|
|
inserted += 1
|
|
|
|
else:
|
|
|
|
|
|
|
|
print "Skipping attachment %s, already exists in the database." % attachment[2]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if inserted > 0:
|
|
|
|
if inserted > 0:
|
|
|
|
print "Successfully inserted %d attachment(s) for %s." % (inserted, sha1_hash)
|
|
|
|
print "Successfully inserted %d attachment(s) for %s." % (inserted, sha1_hash)
|
|
|
|