2012-05-27 05:09:27 +02:00
|
|
|
#!/usr/bin/python
|
|
|
|
|
2012-05-27 07:31:37 +02:00
|
|
|
import os, argparse, hashlib, email, email.header, glob, sqlite3
|
|
|
|
|
|
|
|
def getheader(header_text, default="ascii"):
|
|
|
|
headers = email.header.decode_header(header_text)
|
|
|
|
header_sections = [unicode(text, charset or default) for text, charset in headers]
|
|
|
|
return u"".join(header_sections)
|
|
|
|
|
2012-05-27 07:00:46 +02:00
|
|
|
print sqlite3.version
|
2012-05-27 05:25:48 +02:00
|
|
|
parser = argparse.ArgumentParser(description='Parses emails into an SQLite database, and optionally renders static HTML files.')
|
|
|
|
|
|
|
|
parser.add_argument('-p', '--pattern', dest='pattern', action='store', default='*',
|
|
|
|
help='glob pattern (including path) that has to be matched for a file to be parsed')
|
|
|
|
|
|
|
|
parser.add_argument('-r', '--render', dest='render', action='store_true',
|
|
|
|
help='render static HTML files using the template files in templates/')
|
|
|
|
|
|
|
|
args = parser.parse_args()
|
|
|
|
options = vars(args)
|
|
|
|
|
2012-05-27 07:20:01 +02:00
|
|
|
database = sqlite3.connect('emails.db')
|
|
|
|
cursor = database.cursor()
|
2012-05-27 07:00:46 +02:00
|
|
|
|
|
|
|
try:
|
|
|
|
# Try to create the table
|
2012-05-27 07:20:01 +02:00
|
|
|
cursor.execute("CREATE TABLE emails (`message_id`, `from`, `to`, `subject`, `date`, `body`, `html`, `hash`)")
|
2012-05-27 07:00:46 +02:00
|
|
|
except sqlite3.OperationalError:
|
|
|
|
# Table already exists
|
|
|
|
pass
|
|
|
|
|
2012-05-27 06:46:08 +02:00
|
|
|
file_list = glob.glob(options['pattern'])
|
|
|
|
|
|
|
|
for email_file in file_list:
|
|
|
|
message = email.message_from_file(open(email_file))
|
|
|
|
|
|
|
|
if message['message-id'] is None:
|
|
|
|
print "%s is not a valid e-mail file." % email_file
|
|
|
|
else:
|
|
|
|
if 'subject' not in message or message['subject'] is None:
|
|
|
|
subject = ""
|
|
|
|
else:
|
|
|
|
subject = message['subject']
|
|
|
|
|
|
|
|
sha1_hash = hashlib.sha1("%s/%s/%s/%s" % (message['from'], message['to'], message['message-id'], subject)).hexdigest()
|
2012-05-27 07:00:46 +02:00
|
|
|
|
2012-05-27 07:20:01 +02:00
|
|
|
timestamp = 0
|
|
|
|
textbody = ""
|
|
|
|
htmlbody = ""
|
|
|
|
|
2012-05-27 07:31:37 +02:00
|
|
|
new_row = (getheader(message['message-id']), getheader(message['from']), getheader(message['to']), getheader(subject), timestamp, textbody, htmlbody, sha1_hash)
|
2012-05-27 07:20:01 +02:00
|
|
|
cursor.execute("INSERT INTO emails VALUES (?, ?, ?, ?, ?, ?, ?, ?)", new_row)
|
|
|
|
print "Successfully parsed and inserted e-mail with SHA1 hash %s." % sha1_hash
|
|
|
|
|
|
|
|
database.commit()
|
|
|
|
print "Changes successfully committed to database, exiting..."
|