commit a4f00914c41eb4f61d5df311b1c9e1f327af4101 Author: Sven Slootweg Date: Fri Sep 21 04:23:06 2012 +0200 Initial commit diff --git a/pythonwhois/__init__.py b/pythonwhois/__init__.py new file mode 100644 index 0000000..04b37d4 --- /dev/null +++ b/pythonwhois/__init__.py @@ -0,0 +1,170 @@ +#!/usr/bin/python + +import re, subprocess, datetime + +grammar = { + "_default": { + 'domain_name': 'Domain Name:\s?(?P.+)', + 'registrar': 'Registrar:\s?(?P.+)', + 'whois_server': 'Whois Server:\s?(?P.+)', + 'referral_url': 'Referral URL:\s?(?P.+)', + 'updated_date': 'Updated Date:\s?(?P.+)', + 'creation_date': 'Creation Date:\s?(?P.+)', + 'expiration_date': 'Expiration Date:\s?(?P.+)', + 'name_servers': 'Name Server:\s?(?P.+)', + 'status': 'Status:\s?(?P.+)' + }, + "_fallback": { + 'creation_date': ['Created on:\s?(?P.+)', + 'Created on\s?[.]*:\s?(?P.+)\.', + 'Date Registered\s?[.]*:\s?(?P.+)', + 'Domain Created\s?[.]*:\s?(?P.+)'], + 'expiration_date': ['Expires on:\s?(?P.+)', + 'Expires on\s?[.]*:\s?(?P.+)\.', + 'Expiry Date\s?[.]*:\s?(?P.+)', + 'Domain Currently Expires\s?[.]*:\s?(?P.+)'], + 'registrar': ['Registered through:\s?(?P.+)', + 'Registrar Name:\s?(?P.+)'], + 'whois_server': ['Registrar Whois:\s?(?P.+)'], + 'name_servers': ['(?Pd?ns[0-9]+\.[a-z0-9-]+\.[a-z0-9]+)', + '(?P[a-z0-9-]+\.d?ns[0-9]*\.[a-z0-9-]+\.[a-z0-9]+)'], + 'emails': ['(?P[\w.-]+@[\w.-]+\.[\w]{2,4})'] + }, + "_dateformats": ( + '(?P[0-9]{1,2})[./ -](?PJan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[./ -](?P[0-9]{4}|[0-9]{2})' + '(\s+(?P[0-9]{1,2})[:.](?P[0-9]{1,2})[:.](?P[0-9]{1,2}))?', + '(?P[0-9]{4})[./-](?P[0-9]{1,2})[./-](?P[0-9]{1,2})', + '(?P[0-9]{1,2})(?P[0-9]{1,2})(?P[0-9]{4}|[0-9]{2})', + '(?P)(?P)(?P)', + '(?P)(?P)(?P)' + ), + "_months": { + 'jan': 1, + 'january': 1, + 'feb': 2, + 'february': 2, + 'mar': 3, + 'march': 3, + 'apr': 4, + 'april': 4, + 'may': 5, + 'jun': 6, + 'june': 6, + 'jul': 7, + 'july': 7, + 'aug': 8, + 'august': 8, + 'sep': 9, + 'sept': 9, + 'september': 9, + 'oct': 10, + 'october': 10, + 'nov': 11, + 'november': 11, + 'dec': 12, + 'december': 12 + }, + ".*\.ru$": { + 'domain_name': 'domain:\s*(?P.+)', + 'registrar': 'registrar:\s*(?P.+)', + 'creation_date': 'created:\s*(?P.+)', + 'expiration_date': 'paid-till:\s*(?P.+)', + 'name_servers': 'nserver:\s*(?P.+)', + 'status': 'state:\s*(?P.+)' + } +} + +def whois(domain): + ruleset = None + + for regex, rules in grammar.iteritems(): + if regex.startswith("_") == False and re.match(regex, domain): + ruleset = rules + + if ruleset is None: + ruleset = grammar['_default'] + + data = {} + + ping = subprocess.Popen(["whois", domain], stdout = subprocess.PIPE, stderr = subprocess.PIPE) + out, error = ping.communicate() + + for line in out.splitlines(): + for rule_key, rule_regex in ruleset.iteritems(): + result = re.search(rule_regex, line, re.IGNORECASE) + + if result is not None: + try: + data[rule_key].append(result.group("val").strip()) + except KeyError, e: + data[rule_key] = [result.group("val").strip()] + + # Run through fallback detection to gather missing info + for rule_key, rule_regexes in grammar['_fallback'].iteritems(): + if data.has_key(rule_key) == False: + for line in out.splitlines(): + for regex in rule_regexes: + result = re.search(regex, line, re.IGNORECASE) + + if result is not None: + try: + data[rule_key].append(result.group("val").strip()) + except KeyError, e: + data[rule_key] = [result.group("val").strip()] + + # Fill all missing values with None + if data.has_key(rule_key) == False: + data[rule_key] = None + + # Parse dates + if data['expiration_date'] is not None: + data['expiration_date'] = parse_dates(data['expiration_date']) + + if data['creation_date'] is not None: + data['creation_date'] = parse_dates(data['creation_date']) + + return data + +def parse_dates(dates): + parsed_dates = [] + + for date in dates: + for rule in grammar['_dateformats']: + result = re.match(rule, date) + + if result is not None: + # These are always numeric. + year = int(result.group("year")) + day = int(result.group("day")) + + # This will require some more guesswork - some WHOIS servers present the name of the month + try: + month = int(result.group("month")) + except ValueError, e: + # Apparently not a number. Look up the corresponding number. + try: + month = grammar['_months'][result.group("month").lower()] + except KeyError, e: + # Unknown month name, default to 0 + month = 0 + + try: + hour = int(result.group("hour")) + except IndexError, e: + hour = 0 + + try: + minute = int(result.group("minute")) + except IndexError, e: + minute = 0 + + try: + second = int(result.group("second")) + except IndexError, e: + second = 0 + + break + + parsed_dates.append(datetime.datetime(year, month, day, hour, minute, second)) + + return parsed_dates diff --git a/pythonwhois/__init__.pyc b/pythonwhois/__init__.pyc new file mode 100644 index 0000000..a5b3f35 Binary files /dev/null and b/pythonwhois/__init__.pyc differ diff --git a/test.py b/test.py new file mode 100644 index 0000000..7df347e --- /dev/null +++ b/test.py @@ -0,0 +1,8 @@ +#!/usr/bin/python +import sys, pythonwhois + +result = pythonwhois.whois(sys.argv[1]) +print "Creation date: ", +print result['creation_date'] +print "Expiration date: ", +print result['expiration_date']