#!/usr/bin/python import os, argparse, hashlib, sqlite3, time, shutil, cgi, re, math from datetime import datetime parser = argparse.ArgumentParser(description='Renders static HTML pages and indexes from an SQLite database of emails and an attachment folder.') parser.add_argument('-o', dest='output_dir', action='store', default='rendered_files', help='path of the directory where rendered files should be stored') parser.add_argument('-t', dest='template_dir', action='store', default='templates', help='path where the template files are') parser.add_argument('-i', dest='title', action='store', default='Inbox', help='title for the rendered pages') parser.add_argument('-d', dest='database', action='store', default='emails.db', help='path of the database that should be used to render the e-mail files') parser.add_argument('-a', dest='attachment_dir', action='store', default='attachments', help='path where attachments are stored') parser.add_argument('-n', dest='per_page', action='store', default='20', help='amount of entries per page') parser.add_argument('-r', dest='original_path', action='store', default=None, help='path where original .eml files can be found') args = parser.parse_args() options = vars(args) if options['original_path'] is not None: originals_available = True else: originals_available = False def format_size(num): for unit in [' bytes','KB','MB','GB']: if num < 1024.0: return "%3.1f%s" % (num, unit) num /= 1024.0 return "%3.1f%s" % (num, 'TB') def chunk(iterable, chunksize, fillvalue=None): result = [] num_chunks = int(math.ceil(len(iterable) / (chunksize * 1.0))) for i in xrange(0, num_chunks): start = (chunksize * i) if (chunksize * (i + 1)) > len(iterable): end = len(iterable) else: end = (chunksize * (i + 1)) result.append((iterable[start:end])) return result def render_index(email_list, title, identifier): email_list = chunk(email_list, int(options['per_page'])) current_page = 0 for list_chunk in email_list: variables = { 'page': title, 'pagenum': "Page %d of %d" % (current_page + 1, len(email_list)), 'title': options['title'], 'index': "../index.html", 'items': "".join('%s...%s%s%s%s%s' % (message[10], message[0][:8], message[3], message[4], message[10], message[5], message[6], message[7]) for message in list_chunk) } filename = "%s/index_%s_%d.html" % (options['output_dir'], identifier, current_page) generated = template_index % variables open(filename, 'w').write(generated.encode('UTF-8')) print "Wrote page %d for %s." % (current_page, identifier) current_page += 1 if os.path.isfile(options['database']) == False: print "Database file not found. Use the -d switch to specify a custom database path." exit(1) # Connect to database database = sqlite3.connect(options['database']) cursor = database.cursor() # Load templates template_message = open('%s/message.html' % options['template_dir']).read() template_index = open('%s/index.html' % options['template_dir']).read() # Create output directory if necessary try: os.makedirs("%s/messages" % options['output_dir']) except OSError: pass email_list = [] for message_id, sender, recipient, subject, unixtime, textbody, htmlbody, sha1_hash in cursor.execute("SELECT * FROM emails"): sender = cgi.escape(sender, True) recipient = cgi.escape(recipient, True) subject = cgi.escape(subject, True) message_id = cgi.escape(message_id, True) timestamp = datetime.fromtimestamp(unixtime).strftime("%A %B %e, %Y %H:%M:%S") attachment_list = [] attachment_cursor = database.cursor() for attachment_hash, attachment_filename, attachment_type, message_hash, attachment_size in attachment_cursor.execute("SELECT * FROM attachments WHERE `message_hash` = ?", (sha1_hash,)): attachment_extension = os.path.splitext(attachment_filename)[1] attachment_file = "%s%s" % (attachment_hash, attachment_extension) attachment_list.append('%s (%s, %s)' % (attachment_file, attachment_filename, attachment_type, format_size(attachment_size))) if len(attachment_list) > 0: attachments = "".join(attachment_list) print "ATTACHMENTS %s" % sha1_hash else: attachments = '
No attachments.
' versions = {} if textbody != "": available_text = True versions['Plaintext'] = '%s_text.html' % sha1_hash else: available_text = False if htmlbody != "": available_html = True versions['HTML'] = '%s_html.html' % sha1_hash else: available_html = False if originals_available: versions['Original'] = '../original/%s.eml' % sha1_hash version_list = "".join('%s' % (value, key) for key, value in versions.viewitems()) if available_text == True: # Text version variables = { 'subject': subject, 'date': timestamp, 'from': sender, 'to': recipient, 'body': "
%s
" % textbody, 'title': options['title'], 'version': "Plaintext version", 'index': "../index.html", 'versions': version_list, 'attachments': attachments } generated = template_message % variables open('%s/messages/%s_text.html' % (options['output_dir'], sha1_hash), 'w').write(generated.encode('UTF-8')) print "Successfully generated plaintext version of %s." % sha1_hash if available_html == True: # HTML version variables = { 'subject': subject, 'date': timestamp, 'from': sender, 'to': recipient, 'body': htmlbody, 'title': options['title'], 'version': "HTML version", 'index': "../index.html", 'versions': version_list, 'attachments': attachments } generated = template_message % variables open('%s/messages/%s_html.html' % (options['output_dir'], sha1_hash), 'w').write(generated.encode('UTF-8')) print "Successfully generated HTML version of %s." % sha1_hash try: snippet = "%s..." % re.search("^(.{0,200})\\b", textbody, re.DOTALL).group(1) except AttributeError: snippet = "" if available_html == True: page_name = "%s_html.html" % sha1_hash elif available_text == True: page_name = "%s_text.html" % sha1_hash else: continue email_list.append((sha1_hash, available_text, available_html, sender, recipient, subject, timestamp, len(attachment_list), snippet, unixtime, page_name)) # Sort by timestamp, ascending sorted_list = sorted(email_list, key=lambda email: email[9]) render_index(sorted_list, "Sorted from old to new", "date_asc") # Sort by timestamp, descending sorted_list.reverse() render_index(sorted_list, "Sorted from new to old", "date_desc") # Sort by sender, ascending sorted_list = sorted(email_list, key=lambda email: email[3].lower()) render_index(sorted_list, "Sorted by sender, ascending", "from_asc") # Sort by sender, descending sorted_list.reverse() render_index(sorted_list, "Sorted by sender, descending", "from_desc") # Sort by recipient, ascending sorted_list = sorted(email_list, key=lambda email: email[4].lower()) render_index(sorted_list, "Sorted by recipient, ascending", "to_asc") # Sort by recipient, descending sorted_list.reverse() render_index(sorted_list, "Sorted by recipient, descending", "to_desc") # Sort by hash, ascending sorted_list = sorted(email_list, key=lambda email: email[0].lower()) render_index(sorted_list, "Sorted by SHA1 hash, ascending", "hash_asc") # Sort by hash, descending sorted_list.reverse() render_index(sorted_list, "Sorted by SHA1 hash, descending", "hash_desc") # Sort by subject, ascending sorted_list = sorted(email_list, key=lambda email: email[5].lower()) render_index(sorted_list, "Sorted by subject, ascending", "subject_asc") # Sort by subject, descending sorted_list.reverse() render_index(sorted_list, "Sorted by subject, descending", "subject_desc") shutil.copy('%s/style.css' % options['template_dir'], '%s/style.css' % options['output_dir']) if originals_available == True: # Copy all original email messages into a subfolder of the output directory try: shutil.copytree(options['original_path'], '%s/original' % options['output_dir']) print "Original e-mail files copied." except OSError: print "ERROR: Could not copy original e-mail files. Ensure the original/ directory does NOT exist in your output directory yet." try: shutil.copytree(options['attachment_dir'], '%s/attachments' % options['output_dir']) print "Attachments copied." except OSError: print "ERROR: Could not copy attachments. Ensure the attachments/ directory does NOT exist in your output directory yet."