Add hashbang, catch exceptions, and output end results.

master
Sven Slootweg 13 years ago
parent 98d27206ad
commit 99e76ea6ae

@ -1,3 +1,5 @@
#!/usr/bin/python
import re, argparse, os import re, argparse, os
def show_sorted(dictionary): def show_sorted(dictionary):
@ -28,6 +30,7 @@ urls = {}
extensions = {} extensions = {}
total_digits = 10 total_digits = 10
current_lines = 0
try: try:
ignore_extensions = options['extensions'].split(',') ignore_extensions = options['extensions'].split(',')
@ -39,67 +42,73 @@ for logpath in options['logfiles']:
log = open(logpath, 'r') log = open(logpath, 'r')
for line in log: for line in log:
ip, hostname, dash, datetime, timezone, method, uri, version, status, size, referer, useragent = line.split(' ', 11) try:
datetime = datetime[1:] ip, hostname, dash, datetime, timezone, method, uri, version, status, size, referer, useragent = line.split(' ', 11)
date = datetime.split(':')[0] datetime = datetime[1:]
timezone = timezone[:-1] date = datetime.split(':')[0]
method = method[1:] timezone = timezone[:-1]
version = version[:-1] method = method[1:]
useragent = useragent[1:-2] version = version[:-1]
referer = referer[1:-1] useragent = useragent[1:-2]
filename = uri.split('?')[0] referer = referer[1:-1]
extension = os.path.splitext(filename)[1][1:] filename = uri.split('?')[0]
extension = os.path.splitext(filename)[1][1:]
if extension not in ignore_extensions:
if hostname not in hosts: if extension not in ignore_extensions:
hosts[hostname] = 0 if hostname not in hosts:
hosts[hostname] = 0
if referer not in referers:
referers[referer] = 0 if referer not in referers:
referers[referer] = 0
if date not in days:
days[date] = 0 if date not in days:
days[date] = 0
if filename not in files:
files[filename] = 0 if filename not in files:
files[filename] = 0
if uri not in urls:
urls[uri] = 0 if uri not in urls:
urls[uri] = 0
if extension not in extensions:
extensions[extension] = 0 if extension not in extensions:
extensions[extension] = 0
hosts[hostname] += 1
referers[referer] += 1 hosts[hostname] += 1
days[date] += 1 referers[referer] += 1
files[filename] += 1 days[date] += 1
urls[uri] += 1 files[filename] += 1
extensions[extension] += 1 urls[uri] += 1
extensions[extension] += 1
print "Top days:" except ValueError:
show_sorted(days) print "Corrupt log line at line %d, contents: %s" % (current_lines + 1, line[:-1])
print ""
current_lines += 1
print "Top requested hostnames:"
show_sorted(hosts) if current_lines % 1000 == 0:
print "" print "Processed %d lines." % current_lines
except IOError:
print "Could not find file %s, ignored entry." % logpath
print "Top files:" print "Top days:"
show_sorted(files) show_sorted(days)
print "" print ""
print "Top extensions:" print "Top requested hostnames:"
show_sorted(extensions) show_sorted(hosts)
print "" print ""
print "Top referers:" print "Top files:"
show_sorted(referers) show_sorted(files)
print "" print ""
print "Top URLs:" print "Top extensions:"
show_sorted(urls) show_sorted(extensions)
print "" print ""
print "Top referers:"
show_sorted(referers)
print ""
except IOError: print "Top URLs:"
print "Could not find file %s, ignored entry." % logpath show_sorted(urls)
print ""

Loading…
Cancel
Save