Implemented actual parsing plus threshold and ignored extensions

master
Sven Slootweg 13 years ago
parent 6156db5d46
commit 98d27206ad

@ -1,9 +1,105 @@
import re, argparse, os
def show_sorted(dictionary):
for entry in sorted(dictionary, key=dictionary.get, reverse=True):
if options.has_key('minimum') == False or dictionary[entry] > int(options['minimum']):
print str(dictionary[entry]).rjust(total_digits), entry
print ""
parser = argparse.ArgumentParser(description='Parse a lighttpd access log.')
parser.add_argument('logfile', metavar='logfile', type=str, nargs='+',
parser.add_argument('logfiles', metavar='logfile', type=str, nargs='+',
help='path(s) of the logfile(s)')
parser.add_argument('-e', '--extensions', dest='extensions', action='store',
help='specify a comma-separated list of extensions to ignore during parsing')
parser.add_argument('-m', '--minimum', dest='minimum', action='store',
help='the counting threshold that has to be exceeded to display the entry')
args = parser.parse_args()
options = vars(args)
referers = {}
days = {}
hosts = {}
files = {}
urls = {}
extensions = {}
total_digits = 10
try:
ignore_extensions = options['extensions'].split(',')
except AttributeError:
ignore_extensions = []
for logpath in options['logfiles']:
try:
log = open(logpath, 'r')
for line in log:
ip, hostname, dash, datetime, timezone, method, uri, version, status, size, referer, useragent = line.split(' ', 11)
datetime = datetime[1:]
date = datetime.split(':')[0]
timezone = timezone[:-1]
method = method[1:]
version = version[:-1]
useragent = useragent[1:-2]
referer = referer[1:-1]
filename = uri.split('?')[0]
extension = os.path.splitext(filename)[1][1:]
if extension not in ignore_extensions:
if hostname not in hosts:
hosts[hostname] = 0
if referer not in referers:
referers[referer] = 0
if date not in days:
days[date] = 0
if filename not in files:
files[filename] = 0
if uri not in urls:
urls[uri] = 0
if extension not in extensions:
extensions[extension] = 0
hosts[hostname] += 1
referers[referer] += 1
days[date] += 1
files[filename] += 1
urls[uri] += 1
extensions[extension] += 1
print "Top days:"
show_sorted(days)
print ""
print "Top requested hostnames:"
show_sorted(hosts)
print ""
print "Top files:"
show_sorted(files)
print ""
print "Top extensions:"
show_sorted(extensions)
print ""
print "Top referers:"
show_sorted(referers)
print ""
print "Top URLs:"
show_sorted(urls)
print ""
except IOError:
print "Could not find file %s, ignored entry." % logpath

Loading…
Cancel
Save