Automatically migrated from Gitolite
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

278 lines
10 KiB

9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
  1. #!/usr/bin/python
  2. import os, argparse, hashlib, sqlite3, time, shutil, cgi, re, math
  3. from datetime import datetime
  4. parser = argparse.ArgumentParser(description='Renders static HTML pages and indexes from an SQLite database of emails and an attachment folder.')
  5. parser.add_argument('-o', dest='output_dir', action='store', default='rendered_files',
  6. help='path of the directory where rendered files should be stored')
  7. parser.add_argument('-t', dest='template_dir', action='store', default='templates',
  8. help='path where the template files are')
  9. parser.add_argument('-i', dest='title', action='store', default='Inbox',
  10. help='title for the rendered pages')
  11. parser.add_argument('-d', dest='database', action='store', default='emails.db',
  12. help='path of the database that should be used to render the e-mail files')
  13. parser.add_argument('-a', dest='attachment_dir', action='store', default='attachments',
  14. help='path where attachments are stored')
  15. parser.add_argument('-n', dest='per_page', action='store', default='20',
  16. help='amount of entries per page')
  17. parser.add_argument('-r', dest='original_path', action='store', default=None,
  18. help='path where original .eml files can be found')
  19. parser.add_argument('-x', dest='index', action='store', default='date_desc',
  20. help='the sorting state you wish to use as initial page')
  21. args = parser.parse_args()
  22. options = vars(args)
  23. if options['original_path'] is not None:
  24. originals_available = True
  25. else:
  26. originals_available = False
  27. def format_size(num):
  28. for unit in [' bytes','KB','MB','GB']:
  29. if num < 1024.0:
  30. return "%3.1f%s" % (num, unit)
  31. num /= 1024.0
  32. return "%3.1f%s" % (num, 'TB')
  33. def chunk(iterable, chunksize, fillvalue=None):
  34. result = []
  35. num_chunks = int(math.ceil(len(iterable) / (chunksize * 1.0)))
  36. for i in xrange(0, num_chunks):
  37. start = (chunksize * i)
  38. if (chunksize * (i + 1)) > len(iterable):
  39. end = len(iterable)
  40. else:
  41. end = (chunksize * (i + 1))
  42. result.append((iterable[start:end]))
  43. return result
  44. def is_selected(i, page):
  45. if i == page:
  46. return "selected"
  47. else:
  48. return ""
  49. def render_index(email_list, title, identifier):
  50. email_list = chunk(email_list, int(options['per_page']))
  51. current_page = 0
  52. for list_chunk in email_list:
  53. if current_page == 0:
  54. prevpage = "&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;"
  55. else:
  56. prevpage = '<a href="index_%s_%d.html">&lt;&lt; previous page</a>' % (identifier, current_page - 1)
  57. if current_page == len(email_list) - 1:
  58. nextpage = ""
  59. else:
  60. nextpage = '<a href="index_%s_%d.html">next page &gt;&gt;</a>' % (identifier, current_page + 1)
  61. variables = {
  62. 'page': title,
  63. 'pagenum': "Page %d of %d" % (current_page + 1, len(email_list)),
  64. 'title': options['title'],
  65. 'index': "index.html",
  66. 'items': "".join('<tr class="clickable" data-url="messages/%s"><td>%s...</td><td>%s</td><td>%s</td><td><a href="messages/%s">%s</a></td><td>%s</td><td>%s</td></tr>' % (message[10], message[0][:8], message[3], message[4], message[10], message[5], message[6], message[7]) for message in list_chunk),
  67. 'prev': prevpage,
  68. 'next': nextpage,
  69. 'pages': "".join('<option value="index_%s_%d.html" %s>Page %d</option>' % (identifier, i, is_selected(i, current_page), i + 1) for i in xrange(0, len(email_list)))
  70. }
  71. filename = "%s/index_%s_%d.html" % (options['output_dir'], identifier, current_page)
  72. generated = template_index % variables
  73. open(filename, 'w').write(generated.encode('UTF-8'))
  74. print "Wrote page %d for %s." % (current_page, identifier)
  75. current_page += 1
  76. if os.path.isfile(options['database']) == False:
  77. print "Database file not found. Use the -d switch to specify a custom database path."
  78. exit(1)
  79. # Connect to database
  80. database = sqlite3.connect(options['database'])
  81. cursor = database.cursor()
  82. # Load templates
  83. template_message = open('%s/message.html' % options['template_dir']).read()
  84. template_index = open('%s/index.html' % options['template_dir']).read()
  85. # Create output directory if necessary
  86. try:
  87. os.makedirs("%s/messages" % options['output_dir'])
  88. except OSError:
  89. pass
  90. email_list = []
  91. for message_id, sender, recipient, subject, unixtime, textbody, htmlbody, sha1_hash in cursor.execute("SELECT * FROM emails"):
  92. sender = cgi.escape(sender, True)
  93. recipient = cgi.escape(recipient, True)
  94. subject = cgi.escape(subject, True)
  95. message_id = cgi.escape(message_id, True)
  96. timestamp = datetime.fromtimestamp(unixtime).strftime("%A %B %e, %Y %H:%M:%S")
  97. attachment_list = []
  98. attachment_cursor = database.cursor()
  99. for message_hash, attachment_filename, attachment_type, attachment_hash, attachment_size in attachment_cursor.execute("SELECT * FROM attachments WHERE `message_hash` = ?", (sha1_hash,)):
  100. attachment_extension = os.path.splitext(attachment_filename)[1]
  101. attachment_file = "%s%s" % (attachment_hash, attachment_extension)
  102. attachment_list.append('<a href="../attachments/%s" class="attachment">%s (%s, %s)</a>' % (attachment_file, attachment_filename, attachment_type, format_size(attachment_size)))
  103. if len(attachment_list) > 0:
  104. attachments = "".join(attachment_list)
  105. print "ATTACHMENTS %s" % sha1_hash
  106. else:
  107. attachments = '<div class="attachment light">No attachments.</div>'
  108. versions = {}
  109. if textbody != "":
  110. available_text = True
  111. versions['Plaintext'] = '%s_text.html' % sha1_hash
  112. else:
  113. available_text = False
  114. if htmlbody != "":
  115. available_html = True
  116. versions['HTML'] = '%s_html.html' % sha1_hash
  117. else:
  118. available_html = False
  119. if originals_available:
  120. versions['Original'] = '../original/%s.eml' % sha1_hash
  121. try:
  122. version_list = "".join('<a href="%s" class="version">%s</a>' % (value, key) for key, value in versions.viewitems())
  123. except AttributeError:
  124. print "The dict.viewitems() function is not available on your system; you are most likely using a Python version older than 2.7."
  125. exit(1)
  126. if available_text == True:
  127. # Text version
  128. variables = {
  129. 'subject': subject,
  130. 'date': timestamp,
  131. 'from': sender,
  132. 'to': recipient,
  133. 'body': "<pre>%s</pre>" % textbody,
  134. 'title': options['title'],
  135. 'version': "Plaintext version",
  136. 'index': "../index.html",
  137. 'versions': version_list,
  138. 'attachments': attachments
  139. }
  140. generated = template_message % variables
  141. open('%s/messages/%s_text.html' % (options['output_dir'], sha1_hash), 'w').write(generated.encode('UTF-8'))
  142. print "Successfully generated plaintext version of %s." % sha1_hash
  143. if available_html == True:
  144. # HTML version
  145. variables = {
  146. 'subject': subject,
  147. 'date': timestamp,
  148. 'from': sender,
  149. 'to': recipient,
  150. 'body': htmlbody,
  151. 'title': options['title'],
  152. 'version': "HTML version",
  153. 'index': "../index.html",
  154. 'versions': version_list,
  155. 'attachments': attachments
  156. }
  157. generated = template_message % variables
  158. open('%s/messages/%s_html.html' % (options['output_dir'], sha1_hash), 'w').write(generated.encode('UTF-8'))
  159. print "Successfully generated HTML version of %s." % sha1_hash
  160. try:
  161. snippet = "%s..." % re.search("^(.{0,200})\\b", textbody, re.DOTALL).group(1)
  162. except AttributeError:
  163. snippet = ""
  164. if available_html == True:
  165. page_name = "%s_html.html" % sha1_hash
  166. elif available_text == True:
  167. page_name = "%s_text.html" % sha1_hash
  168. else:
  169. continue
  170. email_list.append((sha1_hash, available_text, available_html, sender, recipient, subject, timestamp, len(attachment_list), snippet, unixtime, page_name))
  171. # Sort by timestamp, ascending
  172. sorted_list = sorted(email_list, key=lambda email: email[9])
  173. render_index(sorted_list, "Sorted from old to new", "date_asc")
  174. # Sort by timestamp, descending
  175. sorted_list.reverse()
  176. render_index(sorted_list, "Sorted from new to old", "date_desc")
  177. # Sort by sender, ascending
  178. sorted_list = sorted(email_list, key=lambda email: email[3].lower())
  179. render_index(sorted_list, "Sorted by sender, ascending", "from_asc")
  180. # Sort by sender, descending
  181. sorted_list.reverse()
  182. render_index(sorted_list, "Sorted by sender, descending", "from_desc")
  183. # Sort by recipient, ascending
  184. sorted_list = sorted(email_list, key=lambda email: email[4].lower())
  185. render_index(sorted_list, "Sorted by recipient, ascending", "to_asc")
  186. # Sort by recipient, descending
  187. sorted_list.reverse()
  188. render_index(sorted_list, "Sorted by recipient, descending", "to_desc")
  189. # Sort by hash, ascending
  190. sorted_list = sorted(email_list, key=lambda email: email[0].lower())
  191. render_index(sorted_list, "Sorted by SHA1 hash, ascending", "hash_asc")
  192. # Sort by hash, descending
  193. sorted_list.reverse()
  194. render_index(sorted_list, "Sorted by SHA1 hash, descending", "hash_desc")
  195. # Sort by subject, ascending
  196. sorted_list = sorted(email_list, key=lambda email: email[5].lower())
  197. render_index(sorted_list, "Sorted by subject, ascending", "subject_asc")
  198. # Sort by subject, descending
  199. sorted_list.reverse()
  200. render_index(sorted_list, "Sorted by subject, descending", "subject_desc")
  201. shutil.copy('%s/style.css' % options['template_dir'], '%s/style.css' % options['output_dir'])
  202. shutil.copy('%s/up.png' % options['template_dir'], '%s/up.png' % options['output_dir'])
  203. shutil.copy('%s/down.png' % options['template_dir'], '%s/down.png' % options['output_dir'])
  204. shutil.copy('%s/index_%s_0.html' % (options['output_dir'], options['index']), '%s/index.html' % options['output_dir'])
  205. if originals_available == True:
  206. # Copy all original email messages into a subfolder of the output directory
  207. try:
  208. shutil.copytree(options['original_path'], '%s/original' % options['output_dir'])
  209. print "Original e-mail files copied."
  210. except OSError:
  211. print "ERROR: Could not copy original e-mail files. Ensure the original/ directory does NOT exist in your output directory yet."
  212. try:
  213. shutil.copytree(options['attachment_dir'], '%s/attachments' % options['output_dir'])
  214. print "Attachments copied."
  215. except OSError:
  216. print "ERROR: Could not copy attachments. Ensure the attachments/ directory does NOT exist in your output directory yet."