import re, datetime grammar = { "_data": { 'status': ['Status\s*:\s?(?P.+)', 'state:\s*(?P.+)'], 'creation_date': ['Creation Date:\s?(?P.+)', 'Created on:\s?(?P.+)', 'Created on\s?[.]*:\s?(?P.+)\.', 'Date Registered\s?[.]*:\s?(?P.+)', 'Domain Created\s?[.]*:\s?(?P.+)', 'Domain registered\s?[.]*:\s?(?P.+)', 'Domain record activated\s?[.]*:\s*?(?P.+)', 'Record created on\s?[.]*:?\s*?(?P.+)', 'Record created\s?[.]*:?\s*?(?P.+)', 'Created\s?[.]*:?\s*?(?P.+)', 'Registered on\s?[.]*:?\s*?(?P.+)', 'Registered\s?[.]*:?\s*?(?P.+)', 'Domain Create Date\s?[.]*:?\s*?(?P.+)', 'Domain Registration Date\s?[.]*:?\s*?(?P.+)', 'created:\s*(?P.+)', 'registered:\s*(?P.+)'], 'expiration_date': ['Expiration Date:\s?(?P.+)', 'Expires on:\s?(?P.+)', 'Expires on\s?[.]*:\s?(?P.+)\.', 'Expiry Date\s?[.]*:\s?(?P.+)', 'Expiry\s*:\s?(?P.+)', 'Domain Currently Expires\s?[.]*:\s?(?P.+)', 'Record will expire on\s?[.]*:\s?(?P.+)', 'Domain expires\s?[.]*:\s*?(?P.+)', 'Record expires on\s?[.]*:?\s*?(?P.+)', 'Record expires\s?[.]*:?\s*?(?P.+)', 'Expires\s?[.]*:?\s*?(?P.+)', 'Expire Date\s?[.]*:?\s*?(?P.+)', 'Expired\s?[.]*:?\s*?(?P.+)', 'Domain Expiration Date\s?[.]*:?\s*?(?P.+)', 'paid-till:\s*(?P.+)', 'expire:\s*(?P.+)'], 'updated_date': ['Updated Date:\s?(?P.+)', #'Database last updated on\s?[.]*:?\s*?(?P.+)\s[a-z]+\.?', 'Record last updated on\s?[.]*:?\s?(?P.+)\.', 'Domain record last updated\s?[.]*:\s*?(?P.+)', 'Domain Last Updated\s?[.]*:\s*?(?P.+)', 'Last updated on:\s?(?P.+)', 'Date Modified\s?[.]*:\s?(?P.+)', 'Last Modified\s?[.]*:\s?(?P.+)', 'Domain Last Updated Date\s?[.]*:\s?(?P.+)', 'Record last updated\s?[.]*:\s?(?P.+)', 'Modified\s?[.]*:\s?(?P.+)', 'changed:\s*(?P.+)', 'Last Update\s?[.]*:\s?(?P.+)', 'Last updated on (?P.+) [a-z]{3}', 'Last update of whois database:\s?[a-z]{3}, (?P.+) [a-z]{3}'], 'registrar': ['registrar:\s*(?P.+)', 'Registrar:\s*(?P.+)', 'Registered through:\s?(?P.+)', 'Registrar Name:\s?(?P.+)', 'Record maintained by:\s?(?P.+)', 'Registration Service Provided By:\s?(?P.+)', 'Registrar of Record:\s?(?P.+)', '\tName:\t\s(?P.+)'], 'whois_server': ['Whois Server:\s?(?P.+)', 'Registrar Whois:\s?(?P.+)'], 'name_servers': ['Name Server:\s?(?P.+)', '(?P[a-z]*d?ns[0-9]+([a-z]{3})?\.([a-z0-9-]+\.)+[a-z0-9]+)', 'nameserver:\s*(?P.+)', 'nserver:\s*(?P[^[\s]+)', 'DNS[0-9]+:\s*(?P.+)', 'ns[0-9]+:\s*(?P.+)', 'NS [0-9]+\s*:\s*(?P.+)', '(?P[a-z0-9-]+\.d?ns[0-9]*\.([a-z0-9-]+\.)+[a-z0-9]+)', '(?P([a-z0-9-]+\.)+[a-z0-9]+)(\s+([0-9]{1,3}\.){3}[0-9]{1,3})', '[^a-z0-9.-](?Pd?ns\.([a-z0-9-]+\.)+[a-z0-9]+)'], 'emails': ['(?P[\w.-]+@[\w.-]+\.[\w]{2,4})', '(?P[\w.-]+\sAT\s[\w.-]+\sDOT\s[\w]{2,4})'] }, "_dateformats": ( '(?P[0-9]{1,2})[./ -](?PJan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[./ -](?P[0-9]{4}|[0-9]{2})' '(\s+(?P[0-9]{1,2})[:.](?P[0-9]{1,2})[:.](?P[0-9]{1,2}))?', '[a-z]{3}\s(?PJan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[./ -](?P[0-9]{1,2})' '(\s+(?P[0-9]{1,2})[:.](?P[0-9]{1,2})[:.](?P[0-9]{1,2}))?' '\s[a-z]{3}\s(?P[0-9]{4}|[0-9]{2})', '(?P[0-9]{4})[./-](?P[0-9]{1,2})[./-](?P[0-9]{1,2})', '(?P[0-9]{1,2})[./ -](?P[0-9]{1,2})[./ -](?P[0-9]{4}|[0-9]{2})', '(?P[0-9]{4})(?P[0-9]{2})(?P[0-9]{2})\s((?P[0-9]{1,2})[:.](?P[0-9]{1,2})[:.](?P[0-9]{1,2}))' ), "_months": { 'jan': 1, 'january': 1, 'feb': 2, 'february': 2, 'mar': 3, 'march': 3, 'apr': 4, 'april': 4, 'may': 5, 'jun': 6, 'june': 6, 'jul': 7, 'july': 7, 'aug': 8, 'august': 8, 'sep': 9, 'sept': 9, 'september': 9, 'oct': 10, 'october': 10, 'nov': 11, 'november': 11, 'dec': 12, 'december': 12 } } def parse_raw_whois(raw_data): data = {} raw_data = [segment.replace("\r", "") for segment in raw_data] # Carriage returns are the devil for segment in raw_data: for rule_key, rule_regexes in grammar['_data'].iteritems(): if data.has_key(rule_key) == False: for line in segment.splitlines(): for regex in rule_regexes: result = re.search(regex, line, re.IGNORECASE) if result is not None: val = result.group("val").strip() if val != "": try: data[rule_key].append(val) except KeyError, e: data[rule_key] = [val] # Fill all missing values with None for rule_key, rule_regexes in grammar['_data'].iteritems(): if data.has_key(rule_key) == False: data[rule_key] = None data["contacts"] = parse_registrants(raw_data) # Parse dates if data['expiration_date'] is not None: data['expiration_date'] = remove_duplicates(data['expiration_date']) data['expiration_date'] = parse_dates(data['expiration_date']) if data['creation_date'] is not None: data['creation_date'] = remove_duplicates(data['creation_date']) data['creation_date'] = parse_dates(data['creation_date']) if data['updated_date'] is not None: data['updated_date'] = remove_duplicates(data['updated_date']) data['updated_date'] = parse_dates(data['updated_date']) if data['name_servers'] is not None: data['name_servers'] = remove_duplicates([ns.rstrip(".") for ns in data['name_servers']]) if data['emails'] is not None: data['emails'] = remove_duplicates(data['emails']) if data['registrar'] is not None: data['registrar'] = remove_duplicates(data['registrar']) # Remove e-mail addresses if they are already listed for any of the contacts known_emails = [] for contact in ("registrant", "tech", "admin", "billing"): if data["contacts"][contact] is not None: try: known_emails.append(data["contacts"][contact]["email"]) except KeyError, e: pass # No e-mail recorded for this contact... if data['emails'] is not None: data['emails'] = [email for email in data["emails"] if email not in known_emails] data["raw"] = raw_data return data def parse_dates(dates): global grammar parsed_dates = [] for date in dates: for rule in grammar['_dateformats']: result = re.match(rule, date, re.IGNORECASE) if result is not None: try: # These are always numeric. If they fail, there is no valid date present. year = int(result.group("year")) day = int(result.group("day")) # Detect and correct shorthand year notation if year < 60: year += 2000 elif year < 100: year += 1900 # This will require some more guesswork - some WHOIS servers present the name of the month try: month = int(result.group("month")) except ValueError, e: # Apparently not a number. Look up the corresponding number. try: month = grammar['_months'][result.group("month").lower()] except KeyError, e: # Unknown month name, default to 0 month = 0 try: hour = int(result.group("hour")) except IndexError, e: hour = 0 except TypeError, e: hour = 0 try: minute = int(result.group("minute")) except IndexError, e: minute = 0 except TypeError, e: minute = 0 try: second = int(result.group("second")) except IndexError, e: second = 0 except TypeError, e: second = 0 break except ValueError, e: # Something went horribly wrong, maybe there is no valid date present? year = 0 month = 0 day = 0 hour = 0 minute = 0 second = 0 print e.message try: if year > 0: try: parsed_dates.append(datetime.datetime(year, month, day, hour, minute, second)) except ValueError, e: # We might have gotten the day and month the wrong way around, let's try it the other way around # If you're not using an ISO-standard date format, you're an evil registrar! parsed_dates.append(datetime.datetime(year, day, month, hour, minute, second)) except UnboundLocalError, e: pass if len(parsed_dates) > 0: return parsed_dates else: return None def remove_duplicates(data): cleaned_list = [] for entry in data: if entry not in cleaned_list: cleaned_list.append(entry) return cleaned_list def parse_registrants(data): registrant = None tech_contact = None billing_contact = None admin_contact = None registrant_regexes = [ "Registrant:\n (?P.+)\n (?P.+)\n(?: (?P.*)\n)?(?: (?P.*)\n)? (?P.+), (?P.+)\n (?P.+)\n (?P.+)\n (?P.+)\n\n", # OVH "Registrant ID:(?P.+)\nRegistrant Name:(?P.*)\nRegistrant Organization:(?P.*)\nRegistrant Street1:(?P.*)\nRegistrant Street2:(?P.*)\nRegistrant Street3:(?P.*)\nRegistrant City:(?P.*)\nRegistrant State/Province:(?P.*)\nRegistrant Postal Code:(?P.*)\nRegistrant Country:(?P.*)\nRegistrant Phone:(?P.*)\nRegistrant Phone Ext.:(?P.*)\nRegistrant FAX:(?P.*)\nRegistrant FAX Ext.:(?P.*)\nRegistrant Email:(?P.*)", # Public Interest Registry (.org) "Registrant ID:\s*(?P.+)\nRegistrant Name:\s*(?P.+)\nRegistrant Organization:\s*(?P.*)\nRegistrant Address1:\s*(?P.+)\nRegistrant Address2:\s*(?P.*)\nRegistrant City:\s*(?P.+)\nRegistrant State/Province:\s*(?P.+)\nRegistrant Postal Code:\s*(?P.+)\nRegistrant Country:\s*(?P.+)\nRegistrant Country Code:\s*(?P.+)\nRegistrant Phone Number:\s*(?P.+)\nRegistrant Email:\s*(?P.+)\n", # .CO Internet "Registrant Contact: (?P.+)\nRegistrant Organization: (?P.+)\nRegistrant Name: (?P.+)\nRegistrant Street: (?P.+)\nRegistrant City: (?P.+)\nRegistrant Postal Code: (?P.+)\nRegistrant State: (?P.+)\nRegistrant Country: (?P.+)\nRegistrant Phone: (?P.*)\nRegistrant Phone Ext: (?P.*)\nRegistrant Fax: (?P.*)\nRegistrant Fax Ext: (?P.*)\nRegistrant Email: (?P.*)\n", # Key-Systems GmbH "Registrant Name: (?P.+)\nRegistrant Organization: (?P.*)\nRegistrant Street: (?P.+)\n(?:Registrant Street: (?P.+)\n)?Registrant City: (?P.+)\nRegistrant State\/Province: (?P.+)\nRegistrant Postal Code: (?P.+)\nRegistrant Country: (?P.+)\n(?:Registrant Phone: (?P.+)\n)?(?:Registrant Phone Ext: (?P.*)\n)?(?:Registrant Fax: (?P.+)\n)?(?:Registrant Fax Ext: (?P.*)\n)?(?:Registrant Email: (?P.+)\n)?", # WildWestDomains, GoDaddy, Namecheap "Registrant\n (?P.+)\n Email:(?P.+)\n (?P.+)\n(?: (?P.+)\n)? (?P.+) (?P.+)\n (?P.+)\n Tel: (?P.+)\n\n", # internet.bs "Holder of domain name:\n(?P[\S\s]+)\n(?P.+)\n(?P[A-Z0-9-]+)\s+(?P.+)\n(?P.+)\nContractual Language", # nic.ch "\n\n(?:Owner)?\s+: (?P.*)\n(?:\s+: (?P.*)\n)?\s+: (?P.*)\n\s+: (?P.*)\n\s+: (?P.*)\n\s+: (?P.*)\n", # nic.io "person:\s+(?P.+)", # nic.ru (person) "org:\s+(?P.+)", # nic.ru (organization) ] tech_contact_regexes = [ "Technical Contact:\n (?P.+)\n (?P.+)\n(?: (?P.*)\n)?(?: (?P.*)\n)? (?P.+), (?P.+)\n (?P.+)\n (?P.+)\n (?P.+)\n\n", # OVH "Tech ID:(?P.+)\nTech Name:(?P.*)\nTech Organization:(?P.*)\nTech Street1:(?P.*)\nTech Street2:(?P.*)\nTech Street3:(?P.*)\nTech City:(?P.*)\nTech State/Province:(?P.*)\nTech Postal Code:(?P.*)\nTech Country:(?P.*)\nTech Phone:(?P.*)\nTech Phone Ext.:(?P.*)\nTech FAX:(?P.*)\nTech FAX Ext.:(?P.*)\nTech Email:(?P.*)", # Public Interest Registry (.org) "Technical Contact ID:\s*(?P.+)\nTechnical Contact Name:\s*(?P.+)\nTechnical Contact Organization:\s*(?P.*)\nTechnical Contact Address1:\s*(?P.+)\nTechnical Contact Address2:\s*(?P.*)\nTechnical Contact City:\s*(?P.+)\nTechnical Contact State/Province:\s*(?P.+)\nTechnical Contact Postal Code:\s*(?P.+)\nTechnical Contact Country:\s*(?P.+)\nTechnical Contact Country Code:\s*(?P.+)\nTechnical Contact Phone Number:\s*(?P.+)\nTechnical Contact Email:\s*(?P.+)\n", # .CO Internet "Tech Contact: (?P.+)\nTech Organization: (?P.+)\nTech Name: (?P.+)\nTech Street: (?P.+)\nTech City: (?P.+)\nTech Postal Code: (?P.+)\nTech State: (?P.+)\nTech Country: (?P.+)\nTech Phone: (?P.*)\nTech Phone Ext: (?P.*)\nTech Fax: (?P.*)\nTech Fax Ext: (?P.*)\nTech Email: (?P.*)\n", # Key-Systems GmbH "Tech Name: (?P.+)\nTech Organization: (?P.*)\nTech Street: (?P.+)\n(?:Tech Street: (?P.+)\n)?Tech City: (?P.+)\nTech State\/Province: (?P.+)\nTech Postal Code: (?P.+)\nTech Country: (?P.+)\n(?:Tech Phone: (?P.+)\n)?(?:Tech Phone Ext: (?P.*)\n)?(?:Tech Fax: (?P.+)\n)?(?:Tech Fax Ext: (?P.*)\n)?(?:Tech Email: (?P.+)\n)?", # WildWestDomains, GoDaddy, Namecheap "Technical Contact\n (?P.+)\n Email:(?P.+)\n (?P.+)\n(?: (?P.+)\n)? (?P.+) (?P.+)\n (?P.+)\n Tel: (?P.+)\n\n", # internet.bs "Technical contact:\n(?P[\S\s]+)\n(?P.+)\n(?P[A-Z0-9-]+)\s+(?P.+)\n(?P.+)\n\n" # nic.ch ] admin_contact_regexes = [ "Administrative Contact:\n (?P.+)\n (?P.+)\n(?: (?P.*)\n)?(?: (?P.*)\n)? (?P.+), (?P.+)\n (?P.+)\n (?P.+)\n (?P.+)\n\n", # OVH "Admin ID:(?P.+)\nAdmin Name:(?P.*)\nAdmin Organization:(?P.*)\nAdmin Street1:(?P.*)\nAdmin Street2:(?P.*)\nAdmin Street3:(?P.*)\nAdmin City:(?P.*)\nAdmin State/Province:(?P.*)\nAdmin Postal Code:(?P.*)\nAdmin Country:(?P.*)\nAdmin Phone:(?P.*)\nAdmin Phone Ext.:(?P.*)\nAdmin FAX:(?P.*)\nAdmin FAX Ext.:(?P.*)\nAdmin Email:(?P.*)", # Public Interest Registry (.org) "Administrative Contact ID:\s*(?P.+)\nAdministrative Contact Name:\s*(?P.+)\nAdministrative Contact Organization:\s*(?P.*)\nAdministrative Contact Address1:\s*(?P.+)\nAdministrative Contact Address2:\s*(?P.*)\nAdministrative Contact City:\s*(?P.+)\nAdministrative Contact State/Province:\s*(?P.+)\nAdministrative Contact Postal Code:\s*(?P.+)\nAdministrative Contact Country:\s*(?P.+)\nAdministrative Contact Country Code:\s*(?P.+)\nAdministrative Contact Phone Number:\s*(?P.+)\nAdministrative Contact Email:\s*(?P.+)\n", # .CO Internet "Admin Contact: (?P.+)\nAdmin Organization: (?P.+)\nAdmin Name: (?P.+)\nAdmin Street: (?P.+)\nAdmin City: (?P.+)\nAdmin State: (?P.+)\nAdmin Postal Code: (?P.+)\nAdmin Country: (?P.+)\nAdmin Phone: (?P.*)\nAdmin Phone Ext: (?P.*)\nAdmin Fax: (?P.*)\nAdmin Fax Ext: (?P.*)\nAdmin Email: (?P.*)\n", # Key-Systems GmbH "Admin Name: (?P.+)\nAdmin Organization: (?P.*)\nAdmin Street: (?P.+)\n(?:Admin Street: (?P.+)\n)?Admin City: (?P.+)\nAdmin State\/Province: (?P.+)\nAdmin Postal Code: (?P.+)\nAdmin Country: (?P.+)\n(?:Admin Phone: (?P.+)\n)?(?:Admin Phone Ext: (?P.*)\n)?(?:Admin Fax: (?P.+)\n)?(?:Admin Fax Ext: (?P.*)\n)?(?:Admin Email: (?P.+)\n)?", # WildWestDomains, GoDaddy, Namecheap "Administrative Contact\n (?P.+)\n Email:(?P.+)\n (?P.+)\n(?: (?P.+)\n)? (?P.+) (?P.+)\n (?P.+)\n Tel: (?P.+)\n\n", # internet.bs ] billing_contact_regexes = [ "Billing Contact ID:\s*(?P.+)\nBilling Contact Name:\s*(?P.+)\nBilling Contact Organization:\s*(?P.*)\nBilling Contact Address1:\s*(?P.+)\nBilling Contact Address2:\s*(?P.*)\nBilling Contact City:\s*(?P.+)\nBilling Contact State/Province:\s*(?P.+)\nBilling Contact Postal Code:\s*(?P.+)\nBilling Contact Country:\s*(?P.+)\nBilling Contact Country Code:\s*(?P.+)\nBilling Contact Phone Number:\s*(?P.+)\nBilling Contact Email:\s*(?P.+)\n", # .CO Internet "Billing Contact: (?P.+)\nBilling Organization: (?P.+)\nBilling Name: (?P.+)\nBilling Street: (?P.+)\nBilling City: (?P.+)\nBilling Postal Code: (?P.+)\nBilling State: (?P.+)\nBilling Country: (?P.+)\nBilling Phone: (?P.*)\nBilling Phone Ext: (?P.*)\nBilling Fax: (?P.*)\nBilling Fax Ext: (?P.*)\nBilling Email: (?P.*)\n", # Key-Systems GmbH "Billing Contact:\n (?P.+)\n (?P.+)\n(?: (?P.*)\n)?(?: (?P.*)\n)? (?P.+), (?P.+)\n (?P.+)\n (?P.+)\n (?P.+)\n\n", # OVH ] # Some registries use NIC handle references instead of directly listing contacts... nic_contact_regexes = [ "personname:\s*(?P.+)\norganization:\s*(?P.+)\nstreet address:\s*(?P.+)\npostal code:\s*(?P.+)\ncity:\s*(?P.+)\ncountry:\s*(?P.+)\nphone:\s*(?P.+)\nfax-no:\s*(?P.+)\ne-mail:\s*(?P.+)\nnic-hdl:\s*(?P.+)\nchanged:\s*(?P.+)", # nic.at "nic-hdl:\s*(?P.+)\ntype:\s*(?P.+)\ncontact:\s*(?P.+)\n(?:.+\n)*?(?:address:\s*(?P.+)\naddress:\s*(?P.+)\naddress:\s*(?P.+)\naddress:\s*(?P.+)\n)?(?:phone:\s*(?P.+)\n)?(?:fax-no:\s*(?P.+)\n)?(?:.+\n)*?(?:e-mail:\s*(?P.+)\n)?(?:.+\n)*?changed:\s*(?P[0-9]{2}\/[0-9]{2}\/[0-9]{4}).*\n", # AFNIC madness without country field "nic-hdl:\s*(?P.+)\ntype:\s*(?P.+)\ncontact:\s*(?P.+)\n(?:.+\n)*?(?:address:\s*(?P.+)\n)?(?:address:\s*(?P.+)\n)?(?:address:\s*(?P.+)\n)?(?:phone:\s*(?P.+)\n)?(?:fax-no:\s*(?P.+)\n)?(?:.+\n)*?(?:e-mail:\s*(?P.+)\n)?(?:.+\n)*?changed:\s*(?P[0-9]{2}\/[0-9]{2}\/[0-9]{4}).*\n", # AFNIC madness any country -at all- "nic-hdl:\s*(?P.+)\ntype:\s*(?P.+)\ncontact:\s*(?P.+)\n(?:.+\n)*?(?:address:\s*(?P.+)\n)?(?:address:\s*(?P.+)\n)?(?:address:\s*(?P.+)\n)?(?:address:\s*(?P.+)\n)?country:\s*(?P.+)\n(?:phone:\s*(?P.+)\n)?(?:fax-no:\s*(?P.+)\n)?(?:.+\n)*?(?:e-mail:\s*(?P.+)\n)?(?:.+\n)*?changed:\s*(?P[0-9]{2}\/[0-9]{2}\/[0-9]{4}).*\n", # AFNIC madness with country field ] nic_contact_references = { "registrant": [ "registrant:\s*(?P.+)", # nic.at "holder-c:\s*(?P.+)", # AFNIC ], "tech": [ "tech-c:\s*(?P.+)", # nic.at, AFNIC ], "admin": [ "admin-c:\s*(?P.+)", # nic.at, AFNIC ], } for regex in registrant_regexes: for segment in data: match = re.search(regex, segment) if match is not None: registrant = match.groupdict() break for regex in tech_contact_regexes: for segment in data: match = re.search(regex, segment) if match is not None: tech_contact = match.groupdict() break for regex in admin_contact_regexes: for segment in data: match = re.search(regex, segment) if match is not None: admin_contact = match.groupdict() break for regex in billing_contact_regexes: for segment in data: match = re.search(regex, segment) if match is not None: billing_contact = match.groupdict() break # Find NIC handle contact definitions handle_contacts = [] for regex in nic_contact_regexes: for segment in data: matches = re.finditer(regex, segment) for match in matches: handle_contacts.append(match.groupdict()) # Find NIC handle references and process them for category in nic_contact_references: for regex in nic_contact_references[category]: for segment in data: match = re.search(regex, segment) if match is not None: data_reference = match.groupdict() for contact in handle_contacts: if contact["handle"] == data_reference["handle"]: data_reference.update(contact) if category == "registrant": registrant = data_reference elif category == "tech": tech_contact = data_reference elif category == "billing": billing_contact = data_reference elif category == "admin": admin_contact = data_reference break # Post-processing for obj in (registrant, tech_contact, billing_contact, admin_contact): if obj is not None: for key in obj.keys(): #obj[key] = obj[key].strip("\r") if obj[key] is None or obj[key] == "": del obj[key] if "phone_ext" in obj: if "phone" in obj: obj["phone"] += "ext. %s" % obj["phone_ext"] del obj["phone_ext"] if "street1" in obj: street_items = [] i = 1 while True: try: street_items.append(obj["street%d" % i]) del obj["street%d" % i] except KeyError, e: break i += 1 obj["street"] = "\n".join(street_items) if 'changedate' in obj: obj['changedate'] = parse_dates([obj['changedate']])[0] if 'street' in obj and "\n" in obj["street"] and 'postalcode' not in obj: # Deal with certain mad WHOIS servers that don't properly delimit address data... (yes, AFNIC, looking at you) lines = [x.strip() for x in obj["street"].splitlines()] if " " in lines[-1]: postal_code, city = lines[-1].split(" ", 1) obj["postalcode"] = postal_code obj["city"] = city obj["street"] = "\n".join(lines[:-1]) return { "registrant": registrant, "tech": tech_contact, "admin": admin_contact, "billing": billing_contact, }