2012-05-27 12:15:04 +02:00
#!/usr/bin/python
2012-05-28 04:16:34 +02:00
import os, argparse, hashlib, sqlite3, time, shutil, cgi, re, math
2012-05-27 14:28:05 +02:00
from datetime import datetime
2012-05-27 12:15:04 +02:00
parser = argparse.ArgumentParser(description='Renders static HTML pages and indexes from an SQLite database of emails and an attachment folder.')
2012-05-27 13:39:15 +02:00
parser.add_argument('-o', dest='output_dir', action='store', default='rendered_files',
2012-05-27 13:12:15 +02:00
help='path of the directory where rendered files should be stored')
parser.add_argument('-t', dest='template_dir', action='store', default='templates',
help='path where the template files are')
2012-05-27 13:39:15 +02:00
parser.add_argument('-i', dest='title', action='store', default='Inbox',
help='title for the rendered pages')
2012-05-27 12:15:04 +02:00
parser.add_argument('-d', dest='database', action='store', default='emails.db',
help='path of the database that should be used to render the e-mail files')
parser.add_argument('-a', dest='attachment_dir', action='store', default='attachments',
help='path where attachments are stored')
2012-05-28 05:37:11 +02:00
parser.add_argument('-n', dest='per_page', action='store', default='20',
help='amount of entries per page')
2012-05-28 08:01:01 +02:00
parser.add_argument('-r', dest='original_path', action='store', default=None,
help='path where original .eml files can be found')
2012-05-27 12:15:04 +02:00
args = parser.parse_args()
options = vars(args)
2012-05-28 08:01:01 +02:00
if options['original_path'] is not None:
originals_available = True
else:
originals_available = False
2012-05-27 15:00:00 +02:00
def format_size(num):
for unit in [' bytes','KB','MB','GB']:
if num < 1024.0:
return "%3.1f%s" % (num, unit)
num /= 1024.0
return "%3.1f%s" % (num, 'TB')
2012-05-28 04:16:34 +02:00
def chunk(iterable, chunksize, fillvalue=None):
result = []
num_chunks = int(math.ceil(len(iterable) / (chunksize * 1.0)))
for i in xrange(0, num_chunks):
start = (chunksize * i)
2012-05-28 07:01:44 +02:00
if (chunksize * (i + 1)) > len(iterable):
2012-05-28 04:16:34 +02:00
end = len(iterable)
else:
end = (chunksize * (i + 1))
result.append((iterable[start:end]))
return result
2012-05-28 10:29:56 +02:00
def is_selected(i, page):
if i == page:
return "selected"
else:
return ""
2012-05-28 04:16:34 +02:00
def render_index(email_list, title, identifier):
2012-05-28 05:37:11 +02:00
email_list = chunk(email_list, int(options['per_page']))
2012-05-28 04:16:34 +02:00
2012-05-28 04:27:22 +02:00
current_page = 0
2012-05-28 04:18:44 +02:00
for list_chunk in email_list:
2012-05-28 10:29:56 +02:00
if current_page == 0:
prevpage = " "
else:
prevpage = '<a href="index_%s_%d.html"><< previous page</a>' % (identifier, current_page - 1)
if current_page == len(email_list) - 1:
nextpage = ""
else:
nextpage = '<a href="index_%s_%d.html">next page >></a>' % (identifier, current_page + 1)
2012-05-28 04:27:22 +02:00
variables = {
'page': title,
'pagenum': "Page %d of %d" % (current_page + 1, len(email_list)),
'title': options['title'],
'index': "../index.html",
2012-05-28 10:29:56 +02:00
'items': "".join('<tr class="clickable" data-url="messages/%s"><td>%s...</td><td>%s</td><td>%s</td><td><a href="messages/%s">%s</a></td><td>%s</td><td>%s</td></tr>' % (message[10], message[0][:8], message[3], message[4], message[10], message[5], message[6], message[7]) for message in list_chunk),
'prev': prevpage,
'next': nextpage,
'pages': "".join('<option value="index_%s_%d.html" %s>Page %d</option>' % (identifier, i, is_selected(i, current_page), i + 1) for i in xrange(0, len(email_list)))
2012-05-28 04:27:22 +02:00
}
2012-05-28 05:20:19 +02:00
filename = "%s/index_%s_%d.html" % (options['output_dir'], identifier, current_page)
generated = template_index % variables
open(filename, 'w').write(generated.encode('UTF-8'))
print "Wrote page %d for %s." % (current_page, identifier)
2012-05-28 04:27:22 +02:00
current_page += 1
2012-05-27 15:00:00 +02:00
2012-05-27 12:37:00 +02:00
if os.path.isfile(options['database']) == False:
print "Database file not found. Use the -d switch to specify a custom database path."
exit(1)
2012-05-27 12:15:04 +02:00
# Connect to database
database = sqlite3.connect(options['database'])
cursor = database.cursor()
2012-05-27 13:12:15 +02:00
# Load templates
template_message = open('%s/message.html' % options['template_dir']).read()
2012-05-28 03:18:55 +02:00
template_index = open('%s/index.html' % options['template_dir']).read()
2012-05-27 13:12:15 +02:00
2012-05-27 13:39:15 +02:00
# Create output directory if necessary
try:
2012-05-27 13:48:39 +02:00
os.makedirs("%s/messages" % options['output_dir'])
2012-05-27 13:39:15 +02:00
except OSError:
pass
2012-05-27 15:28:24 +02:00
email_list = []
2012-05-28 07:42:34 +02:00
for message_id, sender, recipient, subject, unixtime, textbody, htmlbody, sha1_hash in cursor.execute("SELECT * FROM emails"):
2012-05-27 13:39:15 +02:00
2012-05-27 14:00:31 +02:00
sender = cgi.escape(sender, True)
recipient = cgi.escape(recipient, True)
subject = cgi.escape(subject, True)
message_id = cgi.escape(message_id, True)
2012-05-28 07:42:34 +02:00
timestamp = datetime.fromtimestamp(unixtime).strftime("%A %B %e, %Y %H:%M:%S")
2012-05-27 13:52:24 +02:00
2012-05-27 15:00:00 +02:00
attachment_list = []
attachment_cursor = database.cursor()
2012-05-28 08:18:14 +02:00
for message_hash, attachment_filename, attachment_type, attachment_hash, attachment_size in attachment_cursor.execute("SELECT * FROM attachments WHERE `message_hash` = ?", (sha1_hash,)):
2012-05-28 08:11:15 +02:00
attachment_extension = os.path.splitext(attachment_filename)[1]
attachment_file = "%s%s" % (attachment_hash, attachment_extension)
2012-05-27 15:00:00 +02:00
attachment_list.append('<a href="../attachments/%s" class="attachment">%s (%s, %s)</a>' % (attachment_file, attachment_filename, attachment_type, format_size(attachment_size)))
if len(attachment_list) > 0:
attachments = "".join(attachment_list)
print "ATTACHMENTS %s" % sha1_hash
else:
attachments = '<div class="attachment light">No attachments.</div>'
2012-05-27 13:39:15 +02:00
versions = {}
if textbody != "":
available_text = True
versions['Plaintext'] = '%s_text.html' % sha1_hash
2012-05-27 13:46:05 +02:00
else:
available_text = False
2012-05-27 13:39:15 +02:00
if htmlbody != "":
available_html = True
versions['HTML'] = '%s_html.html' % sha1_hash
2012-05-27 13:46:05 +02:00
else:
available_html = False
2012-05-27 13:39:15 +02:00
2012-05-28 08:01:01 +02:00
if originals_available:
versions['Original'] = '../original/%s.eml' % sha1_hash
2012-05-27 13:40:32 +02:00
version_list = "".join('<a href="%s" class="version">%s</a>' % (value, key) for key, value in versions.viewitems())
2012-05-27 13:39:15 +02:00
2012-05-27 13:46:05 +02:00
if available_text == True:
2012-05-27 13:39:15 +02:00
# Text version
2012-05-27 14:18:20 +02:00
variables = {
'subject': subject,
'date': timestamp,
'from': sender,
'to': recipient,
'body': "<pre>%s</pre>" % textbody,
'title': options['title'],
'version': "Plaintext version",
'index': "../index.html",
2012-05-27 15:00:00 +02:00
'versions': version_list,
'attachments': attachments
2012-05-27 14:18:20 +02:00
}
generated = template_message % variables
2012-05-27 13:47:36 +02:00
open('%s/messages/%s_text.html' % (options['output_dir'], sha1_hash), 'w').write(generated.encode('UTF-8'))
2012-05-27 14:18:20 +02:00
print "Successfully generated plaintext version of %s." % sha1_hash
2012-05-27 13:39:15 +02:00
2012-05-27 13:46:05 +02:00
if available_html == True:
2012-05-27 13:39:15 +02:00
# HTML version
2012-05-27 14:18:20 +02:00
variables = {
'subject': subject,
'date': timestamp,
'from': sender,
'to': recipient,
'body': htmlbody,
'title': options['title'],
'version': "HTML version",
'index': "../index.html",
2012-05-27 15:00:00 +02:00
'versions': version_list,
'attachments': attachments
2012-05-27 14:18:20 +02:00
}
generated = template_message % variables
2012-05-27 13:47:36 +02:00
open('%s/messages/%s_html.html' % (options['output_dir'], sha1_hash), 'w').write(generated.encode('UTF-8'))
2012-05-27 14:18:20 +02:00
print "Successfully generated HTML version of %s." % sha1_hash
2012-05-27 15:28:24 +02:00
try:
snippet = "%s..." % re.search("^(.{0,200})\\b", textbody, re.DOTALL).group(1)
except AttributeError:
snippet = ""
2012-05-28 07:48:32 +02:00
if available_html == True:
page_name = "%s_html.html" % sha1_hash
elif available_text == True:
page_name = "%s_text.html" % sha1_hash
else:
continue
email_list.append((sha1_hash, available_text, available_html, sender, recipient, subject, timestamp, len(attachment_list), snippet, unixtime, page_name))
2012-05-27 15:28:24 +02:00
# Sort by timestamp, ascending
2012-05-28 07:42:34 +02:00
sorted_list = sorted(email_list, key=lambda email: email[9])
2012-05-28 04:16:34 +02:00
render_index(sorted_list, "Sorted from old to new", "date_asc")
2012-05-27 15:28:24 +02:00
# Sort by timestamp, descending
sorted_list.reverse()
2012-05-28 05:20:44 +02:00
render_index(sorted_list, "Sorted from new to old", "date_desc")
2012-05-27 15:28:24 +02:00
# Sort by sender, ascending
sorted_list = sorted(email_list, key=lambda email: email[3].lower())
2012-05-28 08:11:15 +02:00
render_index(sorted_list, "Sorted by sender, ascending", "from_asc")
2012-05-27 15:28:24 +02:00
# Sort by sender, descending
sorted_list.reverse()
2012-05-28 08:11:15 +02:00
render_index(sorted_list, "Sorted by sender, descending", "from_desc")
2012-05-27 15:28:24 +02:00
# Sort by recipient, ascending
sorted_list = sorted(email_list, key=lambda email: email[4].lower())
2012-05-28 08:11:15 +02:00
render_index(sorted_list, "Sorted by recipient, ascending", "to_asc")
2012-05-27 15:28:24 +02:00
# Sort by recipient, descending
sorted_list.reverse()
2012-05-28 08:11:15 +02:00
render_index(sorted_list, "Sorted by recipient, descending", "to_desc")
2012-05-27 15:28:24 +02:00
# Sort by hash, ascending
sorted_list = sorted(email_list, key=lambda email: email[0].lower())
2012-05-28 08:11:15 +02:00
render_index(sorted_list, "Sorted by SHA1 hash, ascending", "hash_asc")
2012-05-27 15:28:24 +02:00
# Sort by hash, descending
sorted_list.reverse()
2012-05-28 08:11:15 +02:00
render_index(sorted_list, "Sorted by SHA1 hash, descending", "hash_desc")
2012-05-27 15:28:24 +02:00
# Sort by subject, ascending
sorted_list = sorted(email_list, key=lambda email: email[5].lower())
2012-05-28 08:11:15 +02:00
render_index(sorted_list, "Sorted by subject, ascending", "subject_asc")
2012-05-27 15:28:24 +02:00
# Sort by subject, descending
sorted_list.reverse()
2012-05-28 08:11:15 +02:00
render_index(sorted_list, "Sorted by subject, descending", "subject_desc")
2012-05-27 15:28:24 +02:00
2012-05-27 13:46:05 +02:00
shutil.copy('%s/style.css' % options['template_dir'], '%s/style.css' % options['output_dir'])
2012-05-28 10:03:29 +02:00
shutil.copy('%s/up.png' % options['template_dir'], '%s/up.png' % options['output_dir'])
shutil.copy('%s/down.png' % options['template_dir'], '%s/down.png' % options['output_dir'])
2012-05-28 08:01:01 +02:00
if originals_available == True:
# Copy all original email messages into a subfolder of the output directory
try:
shutil.copytree(options['original_path'], '%s/original' % options['output_dir'])
print "Original e-mail files copied."
except OSError:
print "ERROR: Could not copy original e-mail files. Ensure the original/ directory does NOT exist in your output directory yet."
2012-05-28 08:11:15 +02:00
try:
shutil.copytree(options['attachment_dir'], '%s/attachments' % options['output_dir'])
print "Attachments copied."
except OSError:
print "ERROR: Could not copy attachments. Ensure the attachments/ directory does NOT exist in your output directory yet."