emailparser/parse

41 lines
1.3 KiB
Plaintext
Raw Normal View History

2012-05-27 05:09:27 +02:00
#!/usr/bin/python
2012-05-27 05:30:36 +02:00
import os, argparse, hashlib, email, glob, sqlite3
2012-05-27 07:00:46 +02:00
print sqlite3.version
2012-05-27 05:25:48 +02:00
parser = argparse.ArgumentParser(description='Parses emails into an SQLite database, and optionally renders static HTML files.')
parser.add_argument('-p', '--pattern', dest='pattern', action='store', default='*',
help='glob pattern (including path) that has to be matched for a file to be parsed')
parser.add_argument('-r', '--render', dest='render', action='store_true',
help='render static HTML files using the template files in templates/')
args = parser.parse_args()
options = vars(args)
2012-05-27 07:00:46 +02:00
database = sqlite3.connect('emails.db').cursor()
try:
# Try to create the table
2012-05-27 07:02:52 +02:00
database.execute("CREATE TABLE emails (`message_id`, `from`, `to`, `subject`, `date`, `body`, `html`, `hash`)")
2012-05-27 07:00:46 +02:00
except sqlite3.OperationalError:
# Table already exists
pass
2012-05-27 06:46:08 +02:00
file_list = glob.glob(options['pattern'])
for email_file in file_list:
message = email.message_from_file(open(email_file))
if message['message-id'] is None:
print "%s is not a valid e-mail file." % email_file
else:
if 'subject' not in message or message['subject'] is None:
subject = ""
else:
subject = message['subject']
sha1_hash = hashlib.sha1("%s/%s/%s/%s" % (message['from'], message['to'], message['message-id'], subject)).hexdigest()
2012-05-27 07:00:46 +02:00