#!/usr/bin/python import re, subprocess, datetime grammar = { "_default": { 'domain_name': 'Domain Name:\s?(?P.+)', 'registrar': 'Registrar:\s?(?P.+)', 'whois_server': 'Whois Server:\s?(?P.+)', 'referral_url': 'Referral URL:\s?(?P.+)', 'updated_date': 'Updated Date:\s?(?P.+)', 'creation_date': 'Creation Date:\s?(?P.+)', 'expiration_date': 'Expiration Date:\s?(?P.+)', 'name_servers': 'Name Server:\s?(?P.+)', 'status': 'Status:\s?(?P.+)' }, "_fallback": { 'creation_date': ['Created on:\s?(?P.+)', 'Created on\s?[.]*:\s?(?P.+)\.', 'Date Registered\s?[.]*:\s?(?P.+)', 'Domain Created\s?[.]*:\s?(?P.+)', 'Domain registered\s?[.]*:\s?(?P.+)', 'Domain record activated\s?[.]*:\s*?(?P.+)', 'Record created on\s?[.]*:?\s*?(?P.+)', 'Record created\s?[.]*:?\s*?(?P.+)', 'Created\s?[.]*:?\s*?(?P.+)', 'Registered on\s?[.]*:?\s*?(?P.+)', 'Registered\s?[.]*:?\s*?(?P.+)', 'Domain Create Date\s?[.]*:?\s*?(?P.+)', 'Domain Registration Date\s?[.]*:?\s*?(?P.+)'], 'expiration_date': ['Expires on:\s?(?P.+)', 'Expires on\s?[.]*:\s?(?P.+)\.', 'Expiry Date\s?[.]*:\s?(?P.+)', 'Domain Currently Expires\s?[.]*:\s?(?P.+)', 'Record will expire on\s?[.]*:\s?(?P.+)', 'Domain expires\s?[.]*:\s*?(?P.+)', 'Record expires on\s?[.]*:?\s*?(?P.+)', 'Record expires\s?[.]*:?\s*?(?P.+)', 'Expires\s?[.]*:?\s*?(?P.+)', 'Expire Date\s?[.]*:?\s*?(?P.+)', 'Expired\s?[.]*:?\s*?(?P.+)', 'Domain Expiration Date\s?[.]*:?\s*?(?P.+)'], 'updated_date': ['Database last updated on\s?[.]*:?\s*?(?P.+)\s[a-z]+\.?', 'Record last updated on\s?[.]*:?\s?(?P.+)\.', 'Domain record last updated\s?[.]*:\s*?(?P.+)', 'Domain Last Updated\s?[.]*:\s*?(?P.+)', 'Last updated on:\s?(?P.+)', 'Date Modified\s?[.]*:\s?(?P.+)', 'Last Modified\s?[.]*:\s?(?P.+)', 'Domain Last Updated Date\s?[.]*:\s?(?P.+)', 'Record last updated\s?[.]*:\s?(?P.+)', 'Modified\s?[.]*:\s?(?P.+)', 'Last Update\s?[.]*:\s?(?P.+)', 'Last update of whois database:\s?[a-z]{3}, (?P.+) [a-z]{3}'], 'registrar': ['Registered through:\s?(?P.+)', 'Registrar Name:\s?(?P.+)', 'Record maintained by:\s?(?P.+)', 'Registration Service Provided By:\s?(?P.+)', 'Registrar of Record:\s?(?P.+)', '\tName:\t\s(?P.+)'], 'whois_server': ['Registrar Whois:\s?(?P.+)'], 'name_servers': ['(?P[a-z]*d?ns[0-9]+([a-z]{3})?\.([a-z0-9-]+\.)+[a-z0-9]+)', '(?P[a-z0-9-]+\.d?ns[0-9]*\.([a-z0-9-]+\.)+[a-z0-9]+)', '(?P([a-z0-9-]+\.)+[a-z0-9]+)(\s+([0-9]{1,3}\.){3}[0-9]{1,3})', 'DNS[0-9]+:\s*(?P.+)', 'ns[0-9]+:\s*(?P.+)', '[^a-z0-9.-](?Pd?ns\.([a-z0-9-]+\.)+[a-z0-9]+)'], 'emails': ['(?P[\w.-]+@[\w.-]+\.[\w]{2,4})', '(?P[\w.-]+\sAT\s[\w.-]+\sDOT\s[\w]{2,4})'] }, "_dateformats": ( '(?P[0-9]{1,2})[./ -](?PJan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[./ -](?P[0-9]{4}|[0-9]{2})' '(\s+(?P[0-9]{1,2})[:.](?P[0-9]{1,2})[:.](?P[0-9]{1,2}))?', '[a-z]{3}\s(?PJan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[./ -](?P[0-9]{1,2})' '(\s+(?P[0-9]{1,2})[:.](?P[0-9]{1,2})[:.](?P[0-9]{1,2}))?' '\s[a-z]{3}\s(?P[0-9]{4}|[0-9]{2})', '(?P[0-9]{4})[./-](?P[0-9]{1,2})[./-](?P[0-9]{1,2})', '(?P[0-9]{1,2})[./ -](?P[0-9]{1,2})[./ -](?P[0-9]{4}|[0-9]{2})', '(?P[0-9]{4})(?P[0-9]{2})(?P[0-9]{2})\s((?P[0-9]{1,2})[:.](?P[0-9]{1,2})[:.](?P[0-9]{1,2}))' ), "_months": { 'jan': 1, 'january': 1, 'feb': 2, 'february': 2, 'mar': 3, 'march': 3, 'apr': 4, 'april': 4, 'may': 5, 'jun': 6, 'june': 6, 'jul': 7, 'july': 7, 'aug': 8, 'august': 8, 'sep': 9, 'sept': 9, 'september': 9, 'oct': 10, 'october': 10, 'nov': 11, 'november': 11, 'dec': 12, 'december': 12 }, ".*\.ru$": { 'domain_name': 'domain:\s*(?P.+)', 'registrar': 'registrar:\s*(?P.+)', 'creation_date': 'created:\s*(?P.+)', 'expiration_date': 'paid-till:\s*(?P.+)', 'name_servers': 'nserver:\s*(?P.+)', 'status': 'state:\s*(?P.+)', 'updated_date': 'Last updated on (?P.+) [a-z]{3}' }, ".*\.ee$": { 'domain_name': 'domain:\s*(?P.+)', 'registrar': 'registrar:\s*(?P.+)', 'creation_date': 'registered:\s*(?P.+)', 'expiration_date': 'expire:\s*(?P.+)', 'name_servers': 'nserver:\s*(?P.+)', 'status': 'state:\s*(?P.+)' }, ".*\.si$": { 'domain_name': 'domain:\s*(?P.+)', 'registrar': 'registrar:\s*(?P.+)', 'creation_date': 'created:\s*(?P.+)', 'expiration_date': 'expire:\s*(?P.+)', 'name_servers': 'nameserver:\s*(?P.+)', 'status': 'status:\s*(?P.+)' }, ".*\.at$": { 'domain_name': 'domain:\s*(?P.+)', 'name_servers': 'nserver:\s*(?P.+)', 'status': 'state:\s*(?P.+)', 'updated_date': 'changed:\s*(?P.+)' } } def whois(domain): global grammar ruleset = None for regex, rules in grammar.iteritems(): if regex.startswith("_") == False and re.match(regex, domain): ruleset = rules if ruleset is None: ruleset = grammar['_default'] data = {} ping = subprocess.Popen(["jwhois", domain], stdout = subprocess.PIPE, stderr = subprocess.PIPE) out, error = ping.communicate() for line in out.splitlines(): for rule_key, rule_regex in ruleset.iteritems(): result = re.search(rule_regex, line, re.IGNORECASE) if result is not None: val = result.group("val").strip() if val != "": try: data[rule_key].append(val) except KeyError, e: data[rule_key] = [val] # Run through fallback detection to gather missing info for rule_key, rule_regexes in grammar['_fallback'].iteritems(): if data.has_key(rule_key) == False: for line in out.splitlines(): for regex in rule_regexes: result = re.search(regex, line, re.IGNORECASE) if result is not None: val = result.group("val").strip() if val != "": try: data[rule_key].append(val) except KeyError, e: data[rule_key] = [val] # Fill all missing values with None if data.has_key(rule_key) == False: data[rule_key] = None # Parse dates if data['expiration_date'] is not None: data['expiration_date'] = remove_duplicates(data['expiration_date']) data['expiration_date'] = parse_dates(data['expiration_date']) if data['creation_date'] is not None: data['creation_date'] = remove_duplicates(data['creation_date']) data['creation_date'] = parse_dates(data['creation_date']) if data['updated_date'] is not None: data['updated_date'] = remove_duplicates(data['updated_date']) data['updated_date'] = parse_dates(data['updated_date']) if data['name_servers'] is not None: data['name_servers'] = remove_duplicates(data['name_servers']) if data['emails'] is not None: data['emails'] = remove_duplicates(data['emails']) if data['registrar'] is not None: data['registrar'] = remove_duplicates(data['registrar']) return out, data def parse_dates(dates): global grammar parsed_dates = [] for date in dates: for rule in grammar['_dateformats']: result = re.match(rule, date, re.IGNORECASE) if result is not None: try: # These are always numeric. If they fail, there is no valid date present. year = int(result.group("year")) day = int(result.group("day")) # Detect and correct shorthand year notation if year < 60: year += 2000 elif year < 100: year += 1900 # This will require some more guesswork - some WHOIS servers present the name of the month try: month = int(result.group("month")) except ValueError, e: # Apparently not a number. Look up the corresponding number. try: month = grammar['_months'][result.group("month").lower()] except KeyError, e: # Unknown month name, default to 0 month = 0 try: hour = int(result.group("hour")) except IndexError, e: hour = 0 except TypeError, e: hour = 0 try: minute = int(result.group("minute")) except IndexError, e: minute = 0 except TypeError, e: minute = 0 try: second = int(result.group("second")) except IndexError, e: second = 0 except TypeError, e: second = 0 break except ValueError, e: # Something went horribly wrong, maybe there is no valid date present? year = 0 month = 0 day = 0 hour = 0 minute = 0 second = 0 print e.message try: if year > 0: try: parsed_dates.append(datetime.datetime(year, month, day, hour, minute, second)) except ValueError, e: # We might have gotten the day and month the wrong way around, let's try it the other way around # If you're not using an ISO-standard date format, you're an evil registrar! parsed_dates.append(datetime.datetime(year, day, month, hour, minute, second)) except UnboundLocalError, e: pass if len(parsed_dates) > 0: return parsed_dates else: return None def remove_duplicates(data): cleaned_list = [] for entry in data: if entry not in cleaned_list: cleaned_list.append(entry) return cleaned_list