You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
250 lines
8.7 KiB
250 lines
8.7 KiB
import os, argparse, hashlib, sqlite3, time, shutil, cgi, re, math
from datetime import datetime
parser = argparse.ArgumentParser(description='Renders static HTML pages and indexes from an SQLite database of emails and an attachment folder.')
parser.add_argument('-o', dest='output_dir', action='store', default='rendered_files',
help='path of the directory where rendered files should be stored')
parser.add_argument('-t', dest='template_dir', action='store', default='templates',
help='path where the template files are')
parser.add_argument('-i', dest='title', action='store', default='Inbox',
help='title for the rendered pages')
parser.add_argument('-d', dest='database', action='store', default='emails.db',
help='path of the database that should be used to render the e-mail files')
parser.add_argument('-a', dest='attachment_dir', action='store', default='attachments',
help='path where attachments are stored')
parser.add_argument('-n', dest='per_page', action='store', default='20',
help='amount of entries per page')
parser.add_argument('-r', dest='original_path', action='store', default=None,
help='path where original .eml files can be found')
args = parser.parse_args()
options = vars(args)
if options['original_path'] is not None:
originals_available = True
originals_available = False
def format_size(num):
for unit in [' bytes','KB','MB','GB']:
if num < 1024.0:
return "%3.1f%s" % (num, unit)
num /= 1024.0
return "%3.1f%s" % (num, 'TB')
def chunk(iterable, chunksize, fillvalue=None):
result = []
num_chunks = int(math.ceil(len(iterable) / (chunksize * 1.0)))
for i in xrange(0, num_chunks):
start = (chunksize * i)
if (chunksize * (i + 1)) > len(iterable):
end = len(iterable)
end = (chunksize * (i + 1))
return result
def render_index(email_list, title, identifier):
email_list = chunk(email_list, int(options['per_page']))
current_page = 0
for list_chunk in email_list:
variables = {
'page': title,
'pagenum': "Page %d of %d" % (current_page + 1, len(email_list)),
'title': options['title'],
'index': "../index.html",
'items': "".join('<tr class="clickable" data-url="messages/%s"><td>%s...</td><td>%s</td><td>%s</td><td><a href="messages/%s">%s</a></td><td>%s</td><td>%s</td></tr>' % (message[10], message[0][:8], message[3], message[4], message[10], message[5], message[6], message[7]) for message in list_chunk)
filename = "%s/index_%s_%d.html" % (options['output_dir'], identifier, current_page)
generated = template_index % variables
open(filename, 'w').write(generated.encode('UTF-8'))
print "Wrote page %d for %s." % (current_page, identifier)
current_page += 1
if os.path.isfile(options['database']) == False:
print "Database file not found. Use the -d switch to specify a custom database path."
# Connect to database
database = sqlite3.connect(options['database'])
cursor = database.cursor()
# Load templates
template_message = open('%s/message.html' % options['template_dir']).read()
template_index = open('%s/index.html' % options['template_dir']).read()
# Create output directory if necessary
os.makedirs("%s/messages" % options['output_dir'])
except OSError:
email_list = []
for message_id, sender, recipient, subject, unixtime, textbody, htmlbody, sha1_hash in cursor.execute("SELECT * FROM emails"):
sender = cgi.escape(sender, True)
recipient = cgi.escape(recipient, True)
subject = cgi.escape(subject, True)
message_id = cgi.escape(message_id, True)
timestamp = datetime.fromtimestamp(unixtime).strftime("%A %B %e, %Y %H:%M:%S")
attachment_list = []
attachment_cursor = database.cursor()
for attachment_hash, attachment_filename, attachment_type, message_hash, attachment_size in attachment_cursor.execute("SELECT * FROM attachments WHERE `message_hash` = ?", (sha1_hash,)):
attachment_extension = os.path.splitext(attachment_filename)[1]
attachment_file = "%s%s" % (attachment_hash, attachment_extension)
attachment_list.append('<a href="../attachments/%s" class="attachment">%s (%s, %s)</a>' % (attachment_file, attachment_filename, attachment_type, format_size(attachment_size)))
if len(attachment_list) > 0:
attachments = "".join(attachment_list)
print "ATTACHMENTS %s" % sha1_hash
attachments = '<div class="attachment light">No attachments.</div>'
versions = {}
if textbody != "":
available_text = True
versions['Plaintext'] = '%s_text.html' % sha1_hash
available_text = False
if htmlbody != "":
available_html = True
versions['HTML'] = '%s_html.html' % sha1_hash
available_html = False
if originals_available:
versions['Original'] = '../original/%s.eml' % sha1_hash
version_list = "".join('<a href="%s" class="version">%s</a>' % (value, key) for key, value in versions.viewitems())
if available_text == True:
# Text version
variables = {
'subject': subject,
'date': timestamp,
'from': sender,
'to': recipient,
'body': "<pre>%s</pre>" % textbody,
'title': options['title'],
'version': "Plaintext version",
'index': "../index.html",
'versions': version_list,
'attachments': attachments
generated = template_message % variables
open('%s/messages/%s_text.html' % (options['output_dir'], sha1_hash), 'w').write(generated.encode('UTF-8'))
print "Successfully generated plaintext version of %s." % sha1_hash
if available_html == True:
# HTML version
variables = {
'subject': subject,
'date': timestamp,
'from': sender,
'to': recipient,
'body': htmlbody,
'title': options['title'],
'version': "HTML version",
'index': "../index.html",
'versions': version_list,
'attachments': attachments
generated = template_message % variables
open('%s/messages/%s_html.html' % (options['output_dir'], sha1_hash), 'w').write(generated.encode('UTF-8'))
print "Successfully generated HTML version of %s." % sha1_hash
snippet = "%s..." %"^(.{0,200})\\b", textbody, re.DOTALL).group(1)
except AttributeError:
snippet = ""
if available_html == True:
page_name = "%s_html.html" % sha1_hash
elif available_text == True:
page_name = "%s_text.html" % sha1_hash
email_list.append((sha1_hash, available_text, available_html, sender, recipient, subject, timestamp, len(attachment_list), snippet, unixtime, page_name))
# Sort by timestamp, ascending
sorted_list = sorted(email_list, key=lambda email: email[9])
render_index(sorted_list, "Sorted from old to new", "date_asc")
# Sort by timestamp, descending
render_index(sorted_list, "Sorted from new to old", "date_desc")
# Sort by sender, ascending
sorted_list = sorted(email_list, key=lambda email: email[3].lower())
render_index(sorted_list, "Sorted by sender, ascending", "from_asc")
# Sort by sender, descending
render_index(sorted_list, "Sorted by sender, descending", "from_desc")
# Sort by recipient, ascending
sorted_list = sorted(email_list, key=lambda email: email[4].lower())
render_index(sorted_list, "Sorted by recipient, ascending", "to_asc")
# Sort by recipient, descending
render_index(sorted_list, "Sorted by recipient, descending", "to_desc")
# Sort by hash, ascending
sorted_list = sorted(email_list, key=lambda email: email[0].lower())
render_index(sorted_list, "Sorted by SHA1 hash, ascending", "hash_asc")
# Sort by hash, descending
render_index(sorted_list, "Sorted by SHA1 hash, descending", "hash_desc")
# Sort by subject, ascending
sorted_list = sorted(email_list, key=lambda email: email[5].lower())
render_index(sorted_list, "Sorted by subject, ascending", "subject_asc")
# Sort by subject, descending
render_index(sorted_list, "Sorted by subject, descending", "subject_desc")
shutil.copy('%s/style.css' % options['template_dir'], '%s/style.css' % options['output_dir'])
if originals_available == True:
# Copy all original email messages into a subfolder of the output directory
shutil.copytree(options['original_path'], '%s/original' % options['output_dir'])
print "Original e-mail files copied."
except OSError:
print "ERROR: Could not copy original e-mail files. Ensure the original/ directory does NOT exist in your output directory yet."
shutil.copytree(options['attachment_dir'], '%s/attachments' % options['output_dir'])
print "Attachments copied."
except OSError:
print "ERROR: Could not copy attachments. Ensure the attachments/ directory does NOT exist in your output directory yet."