from __future__ import print_function import re, sys, datetime, csv, pkgutil from . import net, shared try: from io import StringIO except ImportError: from cStringIO import StringIO def pkgdata(name): data = pkgutil.get_data("pythonwhois", name) if sys.version_info < (3, 0): return data else: return data.decode("utf-8") def read_dataset(filename, destination, abbrev_key, name_key, is_dict=False): try: if is_dict: reader = csv.DictReader(pkgdata(filename).splitlines()) else: reader = csv.reader(pkgdata(filename).splitlines()) for line in reader: destination[line[abbrev_key]] = line[name_key] except IOError as e: pass airports = {} countries = {} states_au = {} states_us = {} states_ca = {} try: reader = csv.reader(pkgdata("airports.dat").splitlines()) for line in reader: airports[line[4]] = line[2] airports[line[5]] = line[2] except IOError as e: # The distributor likely removed airports.dat for licensing reasons. We'll just leave an empty dict. pass read_dataset("countries.dat", countries, "iso", "name", is_dict=True) read_dataset("countries3.dat", countries, "iso3", "name", is_dict=True) read_dataset("states_au.dat", states_au, 0, 1) read_dataset("states_us.dat", states_us, "abbreviation", "name", is_dict=True) read_dataset("states_ca.dat", states_ca, "abbreviation", "name", is_dict=True) def precompile_regexes(source, flags=0): return [re.compile(regex, flags) for regex in source] grammar = { "_data": { 'id': ['Domain ID:[ ]*(?P.+)'], 'status': ['\[Status\]\s*(?P.+)', 'Status\s*:\s?(?P.+)', '\[State\]\s*(?P.+)', '^state:\s*(?P.+)'], 'creation_date': ['\[Created on\]\s*(?P.+)', 'Created on[.]*: [a-zA-Z]+, (?P.+)', 'Creation Date:\s?(?P.+)', 'Creation date\s*:\s?(?P.+)', 'Registration Date:\s?(?P.+)', 'Created Date:\s?(?P.+)', 'Created on:\s?(?P.+)', 'Created on\s?[.]*:\s?(?P.+)\.', 'Date Registered\s?[.]*:\s?(?P.+)', 'Domain Created\s?[.]*:\s?(?P.+)', 'Domain registered\s?[.]*:\s?(?P.+)', 'Domain record activated\s?[.]*:\s*?(?P.+)', 'Record created on\s?[.]*:?\s*?(?P.+)', 'Record created\s?[.]*:?\s*?(?P.+)', 'Created\s?[.]*:?\s*?(?P.+)', 'Registered on\s?[.]*:?\s*?(?P.+)', 'Registered\s?[.]*:?\s*?(?P.+)', 'Domain Create Date\s?[.]*:?\s*?(?P.+)', 'Domain Registration Date\s?[.]*:?\s*?(?P.+)', 'created:\s*(?P.+)', '\[Registered Date\]\s*(?P.+)', 'created-date:\s*(?P.+)', 'Domain Name Commencement Date: (?P.+)', 'registered:\s*(?P.+)', 'registration:\s*(?P.+)'], 'expiration_date': ['\[Expires on\]\s*(?P.+)', 'Registrar Registration Expiration Date:[ ]*(?P.+)-[0-9]{4}', 'Expires on[.]*: [a-zA-Z]+, (?P.+)', 'Expiration Date:\s?(?P.+)', 'Expiration date\s*:\s?(?P.+)', 'Expires on:\s?(?P.+)', 'Expires on\s?[.]*:\s?(?P.+)\.', 'Exp(?:iry)? Date\s?[.]*:\s?(?P.+)', 'Expiry\s*:\s?(?P.+)', 'Domain Currently Expires\s?[.]*:\s?(?P.+)', 'Record will expire on\s?[.]*:\s?(?P.+)', 'Domain expires\s?[.]*:\s*?(?P.+)', 'Record expires on\s?[.]*:?\s*?(?P.+)', 'Record expires\s?[.]*:?\s*?(?P.+)', 'Expires\s?[.]*:?\s*?(?P.+)', 'Expire Date\s?[.]*:?\s*?(?P.+)', 'Expired\s?[.]*:?\s*?(?P.+)', 'Domain Expiration Date\s?[.]*:?\s*?(?P.+)', 'paid-till:\s*(?P.+)', 'expiration_date:\s*(?P.+)', 'expire-date:\s*(?P.+)', 'renewal:\s*(?P.+)', 'expire:\s*(?P.+)'], 'updated_date': ['\[Last Updated\]\s*(?P.+)', 'Record modified on[.]*: (?P.+) [a-zA-Z]+', 'Record last updated on[.]*: [a-zA-Z]+, (?P.+)', 'Updated Date:\s?(?P.+)', 'Updated date\s*:\s?(?P.+)', #'Database last updated on\s?[.]*:?\s*?(?P.+)\s[a-z]+\.?', 'Record last updated on\s?[.]*:?\s?(?P.+)\.', 'Domain record last updated\s?[.]*:\s*?(?P.+)', 'Domain Last Updated\s?[.]*:\s*?(?P.+)', 'Last updated on:\s?(?P.+)', 'Date Modified\s?[.]*:\s?(?P.+)', 'Last Modified\s?[.]*:\s?(?P.+)', 'Domain Last Updated Date\s?[.]*:\s?(?P.+)', 'Record last updated\s?[.]*:\s?(?P.+)', 'Modified\s?[.]*:\s?(?P.+)', '(C|c)hanged:\s*(?P.+)', 'last_update:\s*(?P.+)', 'Last Update\s?[.]*:\s?(?P.+)', 'Last updated on (?P.+) [a-z]{3,4}', 'Last updated:\s*(?P.+)', 'last-updated:\s*(?P.+)', '\[Last Update\]\s*(?P.+) \([A-Z]+\)', 'Last update of whois database:\s?[a-z]{3}, (?P.+) [a-z]{3,4}'], 'registrar': ['registrar:\s*(?P.+)', 'Registrar:\s*(?P.+)', 'Sponsoring Registrar Organization:\s*(?P.+)', 'Registered through:\s?(?P.+)', 'Registrar Name[.]*:\s?(?P.+)', 'Record maintained by:\s?(?P.+)', 'Registration Service Provided By:\s?(?P.+)', 'Registrar of Record:\s?(?P.+)', 'Domain Registrar :\s?(?P.+)', 'Registration Service Provider: (?P.+)', '\tName:\t\s(?P.+)'], 'whois_server': ['Whois Server:\s?(?P.+)', 'Registrar Whois:\s?(?P.+)'], 'nameservers': ['Name Server:[ ]*(?P[^ ]+)', 'Nameservers:[ ]*(?P[^ ]+)', '(?<=[ .]{2})(?P([a-z0-9-]+\.)+[a-z0-9]+)(\s+([0-9]{1,3}\.){3}[0-9]{1,3})', 'nameserver:\s*(?P.+)', 'nserver:\s*(?P[^[\s]+)', 'Name Server[.]+ (?P[^[\s]+)', 'Hostname:\s*(?P[^\s]+)', 'DNS[0-9]+:\s*(?P.+)', ' DNS:\s*(?P.+)', 'ns[0-9]+:\s*(?P.+)', 'NS [0-9]+\s*:\s*(?P.+)', '\[Name Server\]\s*(?P.+)', '(?<=[ .]{2})(?P[a-z0-9-]+\.d?ns[0-9]*\.([a-z0-9-]+\.)+[a-z0-9]+)', '(?<=[ .]{2})(?P([a-z0-9-]+\.)+[a-z0-9]+)(\s+([0-9]{1,3}\.){3}[0-9]{1,3})', '(?<=[ .]{2})[^a-z0-9.-](?Pd?ns\.([a-z0-9-]+\.)+[a-z0-9]+)', 'Nserver:\s*(?P.+)'], 'emails': ['(?P[\w.-]+@[\w.-]+\.[\w]{2,6})', # Really need to fix this, much longer TLDs now exist... '(?P[\w.-]+\sAT\s[\w.-]+\sDOT\s[\w]{2,6})'] }, "_dateformats": ( '(?P[0-9]{1,2})[./ -](?PJan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[./ -](?P[0-9]{4}|[0-9]{2})' '(\s+(?P[0-9]{1,2})[:.](?P[0-9]{1,2})[:.](?P[0-9]{1,2}))?', '[a-z]{3}\s(?PJan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[./ -](?P[0-9]{1,2})(\s+(?P[0-9]{1,2})[:.](?P[0-9]{1,2})[:.](?P[0-9]{1,2}))?\s[a-z]{3}\s(?P[0-9]{4}|[0-9]{2})', '[a-zA-Z]+\s(?P[0-9]{1,2})(?:st|nd|rd|th)\s(?PJan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec|January|February|March|April|May|June|July|August|September|October|November|December)\s(?P[0-9]{4})', '(?P[0-9]{4})[./-]?(?P[0-9]{2})[./-]?(?P[0-9]{2})(\s|T|/)((?P[0-9]{1,2})[:.-](?P[0-9]{1,2})[:.-](?P[0-9]{1,2}))', '(?P[0-9]{4})[./-](?P[0-9]{1,2})[./-](?P[0-9]{1,2})', '(?P[0-9]{1,2})[./ -](?P[0-9]{1,2})[./ -](?P[0-9]{4}|[0-9]{2})', '(?PJan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) (?P[0-9]{1,2}),? (?P[0-9]{4})', '(?P[0-9]{1,2})-(?PJanuary|February|March|April|May|June|July|August|September|October|November|December)-(?P[0-9]{4})', ), "_months": { 'jan': 1, 'january': 1, 'feb': 2, 'february': 2, 'mar': 3, 'march': 3, 'apr': 4, 'april': 4, 'may': 5, 'jun': 6, 'june': 6, 'jul': 7, 'july': 7, 'aug': 8, 'august': 8, 'sep': 9, 'sept': 9, 'september': 9, 'oct': 10, 'october': 10, 'nov': 11, 'november': 11, 'dec': 12, 'december': 12 } } def preprocess_regex(regex): # Fix for #2; prevents a ridiculous amount of varying size permutations. regex = re.sub(r"\\s\*\(\?P<([^>]+)>\.\+\)", r"\s*(?P<\1>\S.*)", regex) # Experimental fix for #18; removes unnecessary variable-size whitespace # matching, since we're stripping results anyway. regex = re.sub(r"\[ \]\*\(\?P<([^>]+)>\.\*\)", r"(?P<\1>.*)", regex) return regex registrant_regexes = [ " Registrant:[ ]*\n (?P.*)\n (?P.*)\n (?P.*)\n (?P.*), (?P.*) (?P.*)\n (?P.*)\n(?: Phone: (?P.*)\n)? Email: (?P.*)\n", # Corporate Domains, Inc. "Registrant:\n (?P.+)\n (?P.+)\n(?: (?P.*)\n)?(?: (?P.*)\n)? (?P.+), (?P.+)\n (?P.+)\n (?P.+)\n (?P.+)\n\n", # OVH "(?:Registrant ID:(?P.+)\n)?Registrant Name:(?P.*)\n(?:Registrant Organization:(?P.*)\n)?Registrant Street1?:(?P.*)\n(?:Registrant Street2:(?P.*)\n)?(?:Registrant Street3:(?P.*)\n)?Registrant City:(?P.*)\nRegistrant State/Province:(?P.*)\nRegistrant Postal Code:(?P.*)\nRegistrant Country:(?P.*)\nRegistrant Phone:(?P.*)\n(?:Registrant Phone Ext.:(?P.*)\n)?(?:Registrant FAX:(?P.*)\n)?(?:Registrant FAX Ext.:(?P.*)\n)?Registrant Email:(?P.*)", # Public Interest Registry (.org), nic.pw, No-IP.com "Registrant ID:(?P.+)\nRegistrant Name:(?P.*)\n(?:Registrant Organization:(?P.*)\n)?Registrant Address1?:(?P.*)\n(?:Registrant Address2:(?P.*)\n)?(?:Registrant Address3:(?P.*)\n)?Registrant City:(?P.*)\nRegistrant State/Province:(?P.*)\nRegistrant Country/Economy:(?P.*)\nRegistrant Postal Code:(?P.*)\nRegistrant Phone:(?P.*)\n(?:Registrant Phone Ext.:(?P.*)\n)?(?:Registrant FAX:(?P.*)\n)?(?:Registrant FAX Ext.:(?P.*)\n)?Registrant E-mail:(?P.*)", # .ME, DotAsia "Registrant ID:\s*(?P.+)\nRegistrant Name:\s*(?P.+)\nRegistrant Organization:\s*(?P.*)\nRegistrant Address1:\s*(?P.+)\nRegistrant Address2:\s*(?P.*)\nRegistrant City:\s*(?P.+)\nRegistrant State/Province:\s*(?P.+)\nRegistrant Postal Code:\s*(?P.+)\nRegistrant Country:\s*(?P.+)\nRegistrant Country Code:\s*(?P.+)\nRegistrant Phone Number:\s*(?P.+)\nRegistrant Email:\s*(?P.+)\n", # .CO Internet "Registrant Contact: (?P.+)\nRegistrant Organization: (?P.+)\nRegistrant Name: (?P.+)\nRegistrant Street: (?P.+)\nRegistrant City: (?P.+)\nRegistrant Postal Code: (?P.+)\nRegistrant State: (?P.+)\nRegistrant Country: (?P.+)\nRegistrant Phone: (?P.*)\nRegistrant Phone Ext: (?P.*)\nRegistrant Fax: (?P.*)\nRegistrant Fax Ext: (?P.*)\nRegistrant Email: (?P.*)\n", # Key-Systems GmbH "(?:Registrant ID:[ ]*(?P.*)\n)?Registrant Name:[ ]*(?P.*)\n(?:Registrant Organization:[ ]*(?P.*)\n)?Registrant Street:[ ]*(?P.+)\n(?:Registrant Street:[ ]*(?P.+)\n)?(?:Registrant Street:[ ]*(?P.+)\n)?Registrant City:[ ]*(?P.+)\nRegistrant State(?:\/Province)?:[ ]*(?P.*)\nRegistrant Postal Code:[ ]*(?P.+)\nRegistrant Country:[ ]*(?P.+)\n(?:Registrant Phone:[ ]*(?P.*)\n)?(?:Registrant Phone Ext:[ ]*(?P.*)\n)?(?:Registrant Fax:[ ]*(?P.*)\n)?(?:Registrant Fax Ext:[ ]*(?P.*)\n)?(?:Registrant Email:[ ]*(?P.+)\n)?", # WildWestDomains, GoDaddy, Namecheap/eNom, Ascio, Musedoma (.museum), EuroDNS, nic.ps "Registrant\n(?: (?P.+)\n)? (?P.+)\n Email:(?P.+)\n (?P.+)\n(?: (?P.+)\n)? (?P.+) (?P.+)\n (?P.+)\n Tel: (?P.+)\n\n", # internet.bs " Registrant Contact Details:[ ]*\n (?P.*)\n (?P.*)[ ]{2,}\((?P.*)\)\n (?P.*)\n(?: (?P.*)\n)?(?: (?P.*)\n)? (?P.*)\n (?P.*),(?P.*)\n (?P.*)\n Tel. (?P.*)", # Whois.com "owner-id:[ ]*(?P.*)\n(?:owner-organization:[ ]*(?P.*)\n)?owner-name:[ ]*(?P.*)\nowner-street:[ ]*(?P.*)\nowner-city:[ ]*(?P.*)\nowner-zip:[ ]*(?P.*)\nowner-country:[ ]*(?P.*)\n(?:owner-phone:[ ]*(?P.*)\n)?(?:owner-fax:[ ]*(?P.*)\n)?owner-email:[ ]*(?P.*)", # InterNetworX "Registrant:\n registrant_org: (?P.*)\n registrant_name: (?P.*)\n registrant_email: (?P.*)\n registrant_address: (?P
.*)\n registrant_city: (?P.*)\n registrant_state: (?P.*)\n registrant_zip: (?P.*)\n registrant_country: (?P.*)\n registrant_phone: (?P.*)", # Bellnames "Holder of domain name:\n(?P[\S\s]+)\n(?P.+)\n(?P[A-Z0-9-]+)\s+(?P.+)\n(?P.+)\nContractual Language", # nic.ch "\n\n(?:Owner)?\s+: (?P.*)\n(?:\s+: (?P.*)\n)?\s+: (?P.*)\n\s+: (?P.*)\n\s+: (?P.*)\n\s+: (?P.*)\n", # nic.io "Contact Information:\n\[Name\]\s*(?P.*)\n\[Email\]\s*(?P.*)\n\[Web Page\]\s*(?P.*)\n\[Postal code\]\s*(?P.*)\n\[Postal Address\]\s*(?P.*)\n(?:\s+(?P.*)\n)?(?:\s+(?P.*)\n)?\[Phone\]\s*(?P.*)\n\[Fax\]\s*(?P.*)\n", # jprs.jp "g\. \[Organization\] (?P.+)\n", # .co.jp registrations at jprs.jp "Registrant ID:(?P.*)\nRegistrant Name:(?P.*)\n(?:Registrant Organization:(?P.*)\n)?Registrant Address1:(?P.*)\n(?:Registrant Address2:(?P.*)\n)?(?:Registrant Address3:(?P.*)\n)?Registrant City:(?P.*)\n(?:Registrant State/Province:(?P.*)\n)?Registrant Postal Code:(?P.*)\nRegistrant Country:(?P.*)\nRegistrant Country Code:.*\nRegistrant Phone Number:(?P.*)\n(?:Registrant Facsimile Number:(?P.*)\n)?Registrant Email:(?P.*)", # .US, .biz (NeuStar) "Registrant\n Name: (?P.+)\n(?: Organization: (?P.+)\n)? ContactID: (?P.+)\n(?: Address: (?P.+)\n(?: (?P.+)\n(?: (?P.+)\n)?)? (?P.+)\n (?P.+)\n (?P.+)\n (?P.+)\n)?(?: Created: (?P.+)\n)?(?: Last Update: (?P.+)\n)?", # nic.it " Organisation Name[.]* (?P.*)\n Organisation Address[.]* (?P.*)\n Organisation Address[.]* (?P.*)\n(?: Organisation Address[.]* (?P.*)\n)? Organisation Address[.]* (?P.*)\n Organisation Address[.]* (?P.*)\n Organisation Address[.]* (?P.*)\n Organisation Address[.]* (?P.*)", # Melbourne IT (what a horrid format...) "Registrant:[ ]*(?P.+)\n[\s\S]*Eligibility Name:[ ]*(?P.+)\n[\s\S]*Registrant Contact ID:[ ]*(?P.+)\n", # .au business "Eligibility Type:[ ]*Citizen\/Resident\n[\s\S]*Registrant Contact ID:[ ]*(?P.+)\n[\s\S]*Registrant Contact Name:[ ]*(?P.+)\n", # .au individual "Registrant:[ ]*(?P.+)\n[\s\S]*Eligibility Type:[ ]*(Higher Education Institution|Company|Incorporated Association|Other)\n[\s\S]*Registrant Contact ID:[ ]*(?P.+)\n[\s\S]*Registrant Contact Name:[ ]*(?P.+)\n", # .au educational, company, 'incorporated association' (non-profit?), other (spotted for linux.conf.au, unsure if also for others) " Registrant:\n (?P.+)\n\n Registrant type:\n .*\n\n Registrant's address:\n The registrant .* opted to have", # Nominet (.uk) with hidden address " Registrant:\n (?P.+)\n\n[\s\S]* Registrant type:\n .*\n\n Registrant's address:\n (?P.+)\n(?: (?P.+)\n(?: (?P.+)\n)??)?? (?P[^0-9\n]+)\n(?: (?P.+)\n)? (?P.+)\n (?P.+)\n\n", # Nominet (.uk) with visible address "Domain Owner:\n\t(?P.+)\n\n[\s\S]*?(?:Registrant Contact:\n\t(?P.+))?\n\nRegistrant(?:'s)? (?:a|A)ddress:(?:\n\t(?P.+)\n(?:\t(?P.+)\n)?(?:\t(?P.+)\n)?\t(?P.+)\n\t(?P.+))?\n\t(?P.+)(?:\n\t(?P.+) \(Phone\)\n\t(?P.+) \(FAX\)\n\t(?P.+))?\n\n", # .ac.uk - what a mess... "Registrant ID: (?P.+)\nRegistrant: (?P.+)\nRegistrant Contact Email: (?P.+)", # .cn (CNNIC) "Registrant contact:\n (?P.+)\n (?P.*)\n (?P.+), (?P.+) (?P.+) (?P.+)\n\n", # Fabulous.com "registrant-name:\s*(?P.+)\nregistrant-type:\s*(?P.+)\nregistrant-address:\s*(?P.+)\nregistrant-postcode:\s*(?P.+)\nregistrant-city:\s*(?P.+)\nregistrant-country:\s*(?P.+)\n(?:registrant-phone:\s*(?P.+)\n)?(?:registrant-email:\s*(?P.+)\n)?", # Hetzner "Registrant Contact Information :[ ]*\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n\n", # GAL Communication "Contact Information : For Customer # [0-9]+[ ]*\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n\n", # GAL Communication alternative (private WHOIS) format? "Registrant:\n Name: (?P.+)\n City: (?P.+)\n State: (?P.+)\n Country: (?P.+)\n", # Akky (.com.mx) " Registrant:\n (?P.+)\n (?P.+)\n (?P.+) (?P\S+),[ ]+(?P.+)\n (?P.+)", # .am "Domain Holder: (?P.+)\n(?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?))?)?)?)?)?)?, (?P[^.,]+), (?P.+), (?P.+)\n(?P.+)\n(?P[A-Z]+)\n", # .co.th, format 1 "Domain Holder: (?P.+)\n(?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?))?)?)?)?)?)?, (?P.+)\n(?P.+)\n(?P[A-Z]+)\n", # .co.th, format 2 "Domain Holder: (?P.+)\n(?P.+)\n(?:(?P.+)\n)?(?:(?P.+)\n)?.+?, (?P.+)\n(?P.+)\n(?P.+)\n(?P[A-Z]+)\n", # .co.th, format 3 "Domain Holder: (?P.+)\n(?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?))?)?)?)?)?)?\n(?P.+),? (?P[A-Z]{2,3})(?: [A-Z0-9]+)?\n(?P.+)\n(?P[A-Z]+)\n", # .co.th, format 4 " Registrant:\n (?P.+)\n (?P.+) (?P.+)\n (?P.*)\n (?P.*)\n (?P.*)\n (?P.+), (?P[^,\n]*)\n (?P.+)\n", # .com.tw (Western registrars) "Registrant:\n(?P.+)\n(?P.+)\n(?P.+?)(?:,+(?P.+?)(?:,+(?P.+?)(?:,+(?P.+?)(?:,+(?P.+?)(?:,+(?P.+?)(?:,+(?P.+?))?)?)?)?)?)?,(?P.+),(?P.+)\n\n Contact:\n (?P.+) (?P.+)\n TEL: (?P.+?)(?:(?:#|ext.?)(?P.+))?\n FAX: (?P.+)(?:(?:#|ext.?)(?P.+))?\n", # .com.tw (TWNIC/SEEDNET, Taiwanese companies only?) "Registrant Contact Information:\n\nCompany English Name \(It should be the same as the registered/corporation name on your Business Register Certificate or relevant documents\):(?P.+)\nCompany Chinese name:(?P.+)\nAddress: (?P.+)\nCountry: (?P.+)\nEmail: (?P.+)\n", # HKDNR (.hk) "owner:\s+(?P.+)", # .br "person:\s+(?P.+)", # nic.ru (person) "org:\s+(?P.+)", # nic.ru (organization) ] tech_contact_regexes = [ " Technical Contact:[ ]*\n (?P.*)\n (?P.*)\n (?P.*)\n (?P.*), (?P.*) (?P.*)\n (?P.*)\n(?: Phone: (?P.*)\n)? Email: (?P.*)\n", # Corporate Domains, Inc. "Technical Contact:\n (?P.+)\n (?P.+)\n(?: (?P.*)\n)?(?: (?P.*)\n)? (?P.+), (?P.+)\n (?P.+)\n (?P.+)\n (?P.+)\n\n", # OVH "(?:Tech ID:(?P.+)\n)?Tech Name:(?P.*)\n(:?Tech Organization:(?P.*)\n)?Tech Street1?:(?P.*)\n(?:Tech Street2:(?P.*)\n)?(?:Tech Street3:(?P.*)\n)?Tech City:(?P.*)\nTech State/Province:(?P.*)\nTech Postal Code:(?P.*)\nTech Country:(?P.*)\nTech Phone:(?P.*)\n(?:Tech Phone Ext.:(?P.*)\n)?(?:Tech FAX:(?P.*)\n)?(?:Tech FAX Ext.:(?P.*)\n)?Tech Email:(?P.*)", # Public Interest Registry (.org), nic.pw, No-IP.com "Tech(?:nical)? ID:(?P.+)\nTech(?:nical)? Name:(?P.*)\n(?:Tech(?:nical)? Organization:(?P.*)\n)?Tech(?:nical)? Address1?:(?P.*)\n(?:Tech(?:nical)? Address2:(?P.*)\n)?(?:Tech(?:nical)? Address3:(?P.*)\n)?Tech(?:nical)? City:(?P.*)\nTech(?:nical)? State/Province:(?P.*)\nTech(?:nical)? Country/Economy:(?P.*)\nTech(?:nical)? Postal Code:(?P.*)\nTech(?:nical)? Phone:(?P.*)\n(?:Tech(?:nical)? Phone Ext.:(?P.*)\n)?(?:Tech(?:nical)? FAX:(?P.*)\n)?(?:Tech(?:nical)? FAX Ext.:(?P.*)\n)?Tech(?:nical)? E-mail:(?P.*)", # .ME, DotAsia "Technical Contact ID:\s*(?P.+)\nTechnical Contact Name:\s*(?P.+)\nTechnical Contact Organization:\s*(?P.*)\nTechnical Contact Address1:\s*(?P.+)\nTechnical Contact Address2:\s*(?P.*)\nTechnical Contact City:\s*(?P.+)\nTechnical Contact State/Province:\s*(?P.+)\nTechnical Contact Postal Code:\s*(?P.+)\nTechnical Contact Country:\s*(?P.+)\nTechnical Contact Country Code:\s*(?P.+)\nTechnical Contact Phone Number:\s*(?P.+)\nTechnical Contact Email:\s*(?P.+)\n", # .CO Internet "Tech Contact: (?P.+)\nTech Organization: (?P.+)\nTech Name: (?P.+)\nTech Street: (?P.+)\nTech City: (?P.+)\nTech Postal Code: (?P.+)\nTech State: (?P.+)\nTech Country: (?P.+)\nTech Phone: (?P.*)\nTech Phone Ext: (?P.*)\nTech Fax: (?P.*)\nTech Fax Ext: (?P.*)\nTech Email: (?P.*)\n", # Key-Systems GmbH "(?:Tech ID:[ ]*(?P.*)\n)?Tech[ ]*Name:[ ]*(?P.*)\n(?:Tech[ ]*Organization:[ ]*(?P.*)\n)?Tech[ ]*Street:[ ]*(?P.+)\n(?:Tech[ ]*Street:[ ]*(?P.+)\n)?(?:Tech[ ]*Street:[ ]*(?P.+)\n)?Tech[ ]*City:[ ]*(?P.+)\nTech[ ]*State(?:\/Province)?:[ ]*(?P.*)\nTech[ ]*Postal[ ]*Code:[ ]*(?P.+)\nTech[ ]*Country:[ ]*(?P.+)\n(?:Tech[ ]*Phone:[ ]*(?P.*)\n)?(?:Tech[ ]*Phone[ ]*Ext:[ ]*(?P.*)\n)?(?:Tech[ ]*Fax:[ ]*(?P.*)\n)?(?:Tech[ ]*Fax[ ]*Ext:\s*?(?P.*)\n)?(?:Tech[ ]*Email:[ ]*(?P.+)\n)?", # WildWestDomains, GoDaddy, Namecheap/eNom, Ascio, Musedoma (.museum), EuroDNS, nic.ps "Technical Contact\n(?: (?P.+)\n)? (?P.+)\n Email:(?P.+)\n (?P.+)\n(?: (?P.+)\n)? (?P.+) (?P.+)\n (?P.+)\n Tel: (?P.+)\n\n", # internet.bs " Technical Contact Details:[ ]*\n (?P.*)\n (?P.*)[ ]{2,}\((?P.*)\)\n (?P.*)\n(?: (?P.*)\n)?(?: (?P.*)\n)? (?P.*)\n (?P.*),(?P.*)\n (?P.*)\n Tel. (?P.*)", # Whois.com "tech-id:[ ]*(?P.*)\n(?:tech-organization:[ ]*(?P.*)\n)?tech-name:[ ]*(?P.*)\ntech-street:[ ]*(?P.*)\ntech-city:[ ]*(?P.*)\ntech-zip:[ ]*(?P.*)\ntech-country:[ ]*(?P.*)\n(?:tech-phone:[ ]*(?P.*)\n)?(?:tech-fax:[ ]*(?P.*)\n)?tech-email:[ ]*(?P.*)", # InterNetworX "Technical Contact:\n tech_org: (?P.*)\n tech_name: (?P.*)\n tech_email: (?P.*)\n tech_address: (?P
.*)\n tech_city: (?P.*)\n tech_state: (?P.*)\n tech_zip: (?P.*)\n tech_country: (?P.*)\n tech_phone: (?P.*)", # Bellnames "Technical contact:\n(?P[\S\s]+)\n(?P.+)\n(?P[A-Z0-9-]+)\s+(?P.+)\n(?P.+)\n\n", # nic.ch "Tech Contact ID:[ ]*(?P.+)\nTech Contact Name:[ ]*(?P.+)", # .au "Technical Contact ID:(?P.*)\nTechnical Contact Name:(?P.*)\n(?:Technical Contact Organization:(?P.*)\n)?Technical Contact Address1:(?P.*)\n(?:Technical Contact Address2:(?P.*)\n)?(?:Technical Contact Address3:(?P.*)\n)?Technical Contact City:(?P.*)\n(?:Technical Contact State/Province:(?P.*)\n)?Technical Contact Postal Code:(?P.*)\nTechnical Contact Country:(?P.*)\nTechnical Contact Country Code:.*\nTechnical Contact Phone Number:(?P.*)\n(?:Technical Contact Facsimile Number:(?P.*)\n)?Technical Contact Email:(?P.*)", # .US, .biz (NeuStar) "Technical Contacts\n Name: (?P.+)\n(?: Organization: (?P.+)\n)? ContactID: (?P.+)\n(?: Address: (?P.+)\n(?: (?P.+)\n(?: (?P.+)\n)?)? (?P.+)\n (?P.+)\n (?P.+)\n (?P.+)\n)?(?: Created: (?P.+)\n)?(?: Last Update: (?P.+)\n)?", # nic.it // NOTE: Why does this say 'Contacts'? Can it have multiple? "Tech Name[.]* (?P.*)\n Tech Address[.]* (?P.*)\n Tech Address[.]* (?P.*)\n(?: Tech Address[.]* (?P.*)\n)? Tech Address[.]* (?P.*)\n Tech Address[.]* (?P.*)\n Tech Address[.]* (?P.*)\n Tech Address[.]* (?P.*)\n Tech Email[.]* (?P.*)\n Tech Phone[.]* (?P.*)\n Tech Fax[.]* (?P.*)", # Melbourne IT "Technical contact:\n(?: (?P.+)\n)? (?P.+)\n (?P.+)\n (?P.+)\n (?P.+), (?P.+) (?P.+) (?P.+)\n Phone: (?P.*)\n Fax: (?P.*)\n", # Fabulous.com "tech-c-name:\s*(?P.+)\ntech-c-type:\s*(?P.+)\ntech-c-address:\s*(?P.+)\ntech-c-postcode:\s*(?P.+)\ntech-c-city:\s*(?P.+)\ntech-c-country:\s*(?P.+)\n(?:tech-c-phone:\s*(?P.+)\n)?(?:tech-c-email:\s*(?P.+)\n)?", # Hetzner "Admin Contact Information :[ ]*\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n\n", # GAL Communication " Technical contact:\n (?P.+)\n (?P.*)\n (?P.+)\n (?P.+) (?P\S+),[ ]+(?P.+)\n (?P.+)\n (?P.+)\n (?P.*)\n (?P.*)", # .am "Technical:\n\s*Name:\s*(?P.*)\n\s*Organisation:\s*(?P.*)\n\s*Language:.*\n\s*Phone:\s*(?P.*)\n\s*Fax:\s*(?P.*)\n\s*Email:\s*(?P.*)\n", # EURid "\[Zone-C\]\nType: (?P.+)\nName: (?P.+)\n(Organisation: (?P.+)\n){0,1}(Address: (?P.+)\n){1}(Address: (?P.+)\n){0,1}(Address: (?P.+)\n){0,1}(Address: (?P.+)\n){0,1}PostalCode: (?P.+)\nCity: (?P.+)\nCountryCode: (?P[A-Za-z]{2})\nPhone: (?P.+)\nFax: (?P.+)\nEmail: (?P.+)\n(Remarks: (?P.+)\n){0,1}Changed: (?P.+)", # DeNIC "Technical Contact:\n Name: (?P.+)\n City: (?P.+)\n State: (?P.+)\n Country: (?P.+)\n", # Akky (.com.mx) "Tech Contact: (?P.+)\n(?P.+)\n(?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?))?)?)?)?)?)?\n(?P.+),? (?P[A-Z]{2,3})(?: [A-Z0-9]+)?\n(?P.+)\n(?P[A-Z]+)\n", # .co.th, format 1 "Tech Contact: (?P.+)\n(?P.+)\n(?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?))?)?)?)?)?)?\n(?P.+), (?P.+)\n(?P.+)\n(?P[A-Z]+)\n", # .co.th, format 2 "Tech Contact: (?P.+)\n(?P.+)\n(?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?))?)?)?)?)?)?, (?P.+)\n(?P.+)\n(?P[A-Z]+)\n", # .co.th, format 3 "Tech Contact: (?P.+)\n(?P.+) (?P[^\s]+)\n(?P.+)\n(?P[A-Z]+)\n", # .co.th, format 4 "Tech Contact: (?P.+)\n(?P.+)\n(?P.+)\n(?P.+) (?P[^\s]+)\n(?P.+)\n(?P[A-Z]+)\n", # .co.th, format 5 "Tech Contact: (?P.+)\n(?P.+)\n(?P.+)\n(?P.+)\n(?:(?P.+)\n)?(?P.+)\n(?P.+)\n(?P[A-Z]+)\n", # .co.th, format 6 " Technical Contact:\n (?P.+) (?P.+)\n (?P.*)\n (?P.*)\n", # .com.tw (Western registrars) "Technical Contact Information:\n\n(?:Given name: (?P.+)\n)?(?:Family name: (?P.+)\n)?(?:Company name: (?P.+)\n)?Address: (?P.+)\nCountry: (?P.+)\nPhone: (?P.*)\nFax: (?P.*)\nEmail: (?P.+)\n(?:Account Name: (?P.+)\n)?", # HKDNR (.hk) ] admin_contact_regexes = [ " Administrative Contact:[ ]*\n (?P.*)\n (?P.*)\n (?P.*)\n (?P.*), (?P.*) (?P.*)\n (?P.*)\n(?: Phone: (?P.*)\n)? Email: (?P.*)\n", # Corporate Domains, Inc. "Administrative Contact:\n (?P.+)\n (?P.+)\n(?: (?P.*)\n)?(?: (?P.*)\n)? (?P.+), (?P.+)\n (?P.+)\n (?P.+)\n (?P.+)\n\n", # OVH "(?:Admin ID:(?P.+)\n)?Admin Name:(?P.*)\n(?:Admin Organization:(?P.*)\n)?Admin Street1?:(?P.*)\n(?:Admin Street2:(?P.*)\n)?(?:Admin Street3:(?P.*)\n)?Admin City:(?P.*)\nAdmin State/Province:(?P.*)\nAdmin Postal Code:(?P.*)\nAdmin Country:(?P.*)\nAdmin Phone:(?P.*)\n(?:Admin Phone Ext.:(?P.*)\n)?(?:Admin FAX:(?P.*)\n)?(?:Admin FAX Ext.:(?P.*)\n)?Admin Email:(?P.*)", # Public Interest Registry (.org), nic.pw, No-IP.com "Admin(?:istrative)? ID:(?P.+)\nAdmin(?:istrative)? Name:(?P.*)\n(?:Admin(?:istrative)? Organization:(?P.*)\n)?Admin(?:istrative)? Address1?:(?P.*)\n(?:Admin(?:istrative)? Address2:(?P.*)\n)?(?:Admin(?:istrative)? Address3:(?P.*)\n)?Admin(?:istrative)? City:(?P.*)\nAdmin(?:istrative)? State/Province:(?P.*)\nAdmin(?:istrative)? Country/Economy:(?P.*)\nAdmin(?:istrative)? Postal Code:(?P.*)\nAdmin(?:istrative)? Phone:(?P.*)\n(?:Admin(?:istrative)? Phone Ext.:(?P.*)\n)?(?:Admin(?:istrative)? FAX:(?P.*)\n)?(?:Admin(?:istrative)? FAX Ext.:(?P.*)\n)?Admin(?:istrative)? E-mail:(?P.*)", # .ME, DotAsia "Administrative Contact ID:\s*(?P.+)\nAdministrative Contact Name:\s*(?P.+)\nAdministrative Contact Organization:\s*(?P.*)\nAdministrative Contact Address1:\s*(?P.+)\nAdministrative Contact Address2:\s*(?P.*)\nAdministrative Contact City:\s*(?P.+)\nAdministrative Contact State/Province:\s*(?P.+)\nAdministrative Contact Postal Code:\s*(?P.+)\nAdministrative Contact Country:\s*(?P.+)\nAdministrative Contact Country Code:\s*(?P.+)\nAdministrative Contact Phone Number:\s*(?P.+)\nAdministrative Contact Email:\s*(?P.+)\n", # .CO Internet "Admin Contact: (?P.+)\nAdmin Organization: (?P.+)\nAdmin Name: (?P.+)\nAdmin Street: (?P.+)\nAdmin City: (?P.+)\nAdmin State: (?P.+)\nAdmin Postal Code: (?P.+)\nAdmin Country: (?P.+)\nAdmin Phone: (?P.*)\nAdmin Phone Ext: (?P.*)\nAdmin Fax: (?P.*)\nAdmin Fax Ext: (?P.*)\nAdmin Email: (?P.*)\n", # Key-Systems GmbH "(?:Admin ID:[ ]*(?P.*)\n)?Admin[ ]*Name:[ ]*(?P.*)\n(?:Admin[ ]*Organization:[ ]*(?P.*)\n)?Admin[ ]*Street:[ ]*(?P.+)\n(?:Admin[ ]*Street:[ ]*(?P.+)\n)?(?:Admin[ ]*Street:[ ]*(?P.+)\n)?Admin[ ]*City:[ ]*(?P.+)\nAdmin[ ]*State(?:\/Province)?:[ ]*(?P.*)\nAdmin[ ]*Postal[ ]*Code:[ ]*(?P.+)\nAdmin[ ]*Country:[ ]*(?P.+)\n(?:Admin[ ]*Phone:[ ]*(?P.*)\n)?(?:Admin[ ]*Phone[ ]*Ext:[ ]*(?P.*)\n)?(?:Admin[ ]*Fax:[ ]*(?P.*)\n)?(?:Admin[ ]*Fax[ ]*Ext:\s*?(?P.*)\n)?(?:Admin[ ]*Email:[ ]*(?P.+)\n)?", # WildWestDomains, GoDaddy, Namecheap/eNom, Ascio, Musedoma (.museum), EuroDNS, nic.ps "Administrative Contact\n(?: (?P.+)\n)? (?P.+)\n Email:(?P.+)\n (?P.+)\n(?: (?P.+)\n)? (?P.+) (?P.+)\n (?P.+)\n Tel: (?P.+)\n\n", # internet.bs " Administrative Contact Details:[ ]*\n (?P.*)\n (?P.*)[ ]{2,}\((?P.*)\)\n (?P.*)\n(?: (?P.*)\n)?(?: (?P.*)\n)? (?P.*)\n (?P.*),(?P.*)\n (?P.*)\n Tel. (?P.*)", # Whois.com "admin-id:[ ]*(?P.*)\n(?:admin-organization:[ ]*(?P.*)\n)?admin-name:[ ]*(?P.*)\nadmin-street:[ ]*(?P.*)\nadmin-city:[ ]*(?P.*)\nadmin-zip:[ ]*(?P.*)\nadmin-country:[ ]*(?P.*)\n(?:admin-phone:[ ]*(?P.*)\n)?(?:admin-fax:[ ]*(?P.*)\n)?admin-email:[ ]*(?P.*)", # InterNetworX "Administrative Contact:\n admin_org: (?P.*)\n admin_name: (?P.*)\n admin_email: (?P.*)\n admin_address: (?P
.*)\n admin_city: (?P.*)\n admin_state: (?P.*)\n admin_zip: (?P.*)\n admin_country: (?P.*)\n admin_phone: (?P.*)", # Bellnames "Administrative Contact ID:(?P.*)\nAdministrative Contact Name:(?P.*)\n(?:Administrative Contact Organization:(?P.*)\n)?Administrative Contact Address1:(?P.*)\n(?:Administrative Contact Address2:(?P.*)\n)?(?:Administrative Contact Address3:(?P.*)\n)?Administrative Contact City:(?P.*)\n(?:Administrative Contact State/Province:(?P.*)\n)?Administrative Contact Postal Code:(?P.*)\nAdministrative Contact Country:(?P.*)\nAdministrative Contact Country Code:.*\nAdministrative Contact Phone Number:(?P.*)\n(?:Administrative Contact Facsimile Number:(?P.*)\n)?Administrative Contact Email:(?P.*)", # .US, .biz (NeuStar) "Admin Contact\n Name: (?P.+)\n(?: Organization: (?P.+)\n)? ContactID: (?P.+)\n(?: Address: (?P.+)\n(?: (?P.+)\n(?: (?P.+)\n)?)? (?P.+)\n (?P.+)\n (?P.+)\n (?P.+)\n)?(?: Created: (?P.+)\n)?(?: Last Update: (?P.+)\n)?", # nic.it "Admin Name[.]* (?P.*)\n Admin Address[.]* (?P.*)\n Admin Address[.]* (?P.*)\n(?: Admin Address[.]* (?P.*)\n)? Admin Address[.]* (?P.*)\n Admin Address[.]* (?P.*)\n Admin Address[.]* (?P.*)\n Admin Address[.]* (?P.*)\n Admin Email[.]* (?P.*)\n Admin Phone[.]* (?P.*)\n Admin Fax[.]* (?P.*)", # Melbourne IT "Administrative contact:\n(?: (?P.+)\n)? (?P.+)\n (?P.+)\n (?P.+)\n (?P.+), (?P.+) (?P.+) (?P.+)\n Phone: (?P.*)\n Fax: (?P.*)\n", # Fabulous.com "admin-c-name:\s*(?P.+)\nadmin-c-type:\s*(?P.+)\nadmin-c-address:\s*(?P.+)\nadmin-c-postcode:\s*(?P.+)\nadmin-c-city:\s*(?P.+)\nadmin-c-country:\s*(?P.+)\n(?:admin-c-phone:\s*(?P.+)\n)?(?:admin-c-email:\s*(?P.+)\n)?", # Hetzner "Tech Contact Information :[ ]*\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n\n", # GAL Communication " Administrative contact:\n (?P.+)\n (?P.*)\n (?P.+)\n (?P.+) (?P\S+),[ ]+(?P.+)\n (?P.+)\n (?P.+)\n (?P.*)\n (?P.*)", # .am "Administrative Contact:\n Name: (?P.+)\n City: (?P.+)\n State: (?P.+)\n Country: (?P.+)\n", # Akky (.com.mx) "\[Tech-C\]\nType: (?P.+)\nName: (?P.+)\n(Organisation: (?P.+)\n){0,1}(Address: (?P.+)\n){1}(Address: (?P.+)\n){0,1}(Address: (?P.+)\n){0,1}(Address: (?P.+)\n){0,1}PostalCode: (?P.+)\nCity: (?P.+)\nCountryCode: (?P[A-Za-z]{2})\nPhone: (?P.+)\nFax: (?P.+)\nEmail: (?P.+)\n(Remarks: (?P.+)\n){0,1}Changed: (?P.+)", # DeNIC " Administrative Contact:\n (?P.+) (?P.+)\n (?P.*)\n (?P.*)\n", # .com.tw (Western registrars) "Administrative Contact Information:\n\n(?:Given name: (?P.+)\n)?(?:Family name: (?P.+)\n)?(?:Company name: (?P.+)\n)?Address: (?P.+)\nCountry: (?P.+)\nPhone: (?P.*)\nFax: (?P.*)\nEmail: (?P.+)\n(?:Account Name: (?P.+)\n)?", # HKDNR (.hk) ] billing_contact_regexes = [ "(?:Billing ID:(?P.+)\n)?Billing Name:(?P.*)\nBilling Organization:(?P.*)\nBilling Street1:(?P.*)\n(?:Billing Street2:(?P.*)\n)?(?:Billing Street3:(?P.*)\n)?Billing City:(?P.*)\nBilling State/Province:(?P.*)\nBilling Postal Code:(?P.*)\nBilling Country:(?P.*)\nBilling Phone:(?P.*)\n(?:Billing Phone Ext.:(?P.*)\n)?(?:Billing FAX:(?P.*)\n)?(?:Billing FAX Ext.:(?P.*)\n)?Billing Email:(?P.*)", # nic.pw, No-IP.com "Billing ID:(?P.+)\nBilling Name:(?P.*)\n(?:Billing Organization:(?P.*)\n)?Billing Address1?:(?P.*)\n(?:Billing Address2:(?P.*)\n)?(?:Billing Address3:(?P.*)\n)?Billing City:(?P.*)\nBilling State/Province:(?P.*)\nBilling Country/Economy:(?P.*)\nBilling Postal Code:(?P.*)\nBilling Phone:(?P.*)\n(?:Billing Phone Ext.:(?P.*)\n)?(?:Billing FAX:(?P.*)\n)?(?:Billing FAX Ext.:(?P.*)\n)?Billing E-mail:(?P.*)", # DotAsia "Billing Contact ID:\s*(?P.+)\nBilling Contact Name:\s*(?P.+)\nBilling Contact Organization:\s*(?P.*)\nBilling Contact Address1:\s*(?P.+)\nBilling Contact Address2:\s*(?P.*)\nBilling Contact City:\s*(?P.+)\nBilling Contact State/Province:\s*(?P.+)\nBilling Contact Postal Code:\s*(?P.+)\nBilling Contact Country:\s*(?P.+)\nBilling Contact Country Code:\s*(?P.+)\nBilling Contact Phone Number:\s*(?P.+)\nBilling Contact Email:\s*(?P.+)\n", # .CO Internet "Billing Contact: (?P.+)\nBilling Organization: (?P.+)\nBilling Name: (?P.+)\nBilling Street: (?P.+)\nBilling City: (?P.+)\nBilling Postal Code: (?P.+)\nBilling State: (?P.+)\nBilling Country: (?P.+)\nBilling Phone: (?P.*)\nBilling Phone Ext: (?P.*)\nBilling Fax: (?P.*)\nBilling Fax Ext: (?P.*)\nBilling Email: (?P.*)\n", # Key-Systems GmbH "(?:Billing ID:[ ]*(?P.*)\n)?Billing[ ]*Name:[ ]*(?P.*)\n(?:Billing[ ]*Organization:[ ]*(?P.*)\n)?Billing[ ]*Street:[ ]*(?P.+)\n(?:Billing[ ]*Street:[ ]*(?P.+)\n)?Billing[ ]*City:[ ]*(?P.+)\nBilling[ ]*State\/Province:[ ]*(?P.+)\nBilling[ ]*Postal[ ]*Code:[ ]*(?P.+)\nBilling[ ]*Country:[ ]*(?P.+)\n(?:Billing[ ]*Phone:[ ]*(?P.*)\n)?(?:Billing[ ]*Phone[ ]*Ext:[ ]*(?P.*)\n)?(?:Billing[ ]*Fax:[ ]*(?P.*)\n)?(?:Billing[ ]*Fax[ ]*Ext:\s*?(?P.*)\n)?(?:Billing[ ]*Email:[ ]*(?P.+)\n)?", # Musedoma (.museum) "Billing Contact:\n (?P.+)\n (?P.+)\n(?: (?P.*)\n)?(?: (?P.*)\n)? (?P.+), (?P.+)\n (?P.+)\n (?P.+)\n (?P.+)\n\n", # OVH " Billing Contact Details:[ ]*\n (?P.*)\n (?P.*)[ ]{2,}\((?P.*)\)\n (?P.*)\n(?: (?P.*)\n)?(?: (?P.*)\n)? (?P.*)\n (?P.*),(?P.*)\n (?P.*)\n Tel. (?P.*)", # Whois.com "billing-id:[ ]*(?P.*)\n(?:billing-organization:[ ]*(?P.*)\n)?billing-name:[ ]*(?P.*)\nbilling-street:[ ]*(?P.*)\nbilling-city:[ ]*(?P.*)\nbilling-zip:[ ]*(?P.*)\nbilling-country:[ ]*(?P.*)\n(?:billing-phone:[ ]*(?P.*)\n)?(?:billing-fax:[ ]*(?P.*)\n)?billing-email:[ ]*(?P.*)", # InterNetworX "Billing Contact:\n bill_org: (?P.*)\n bill_name: (?P.*)\n bill_email: (?P.*)\n bill_address: (?P
.*)\n bill_city: (?P.*)\n bill_state: (?P.*)\n bill_zip: (?P.*)\n bill_country: (?P.*)\n bill_phone: (?P.*)", # Bellnames "Billing Contact ID:(?P.*)\nBilling Contact Name:(?P.*)\n(?:Billing Contact Organization:(?P.*)\n)?Billing Contact Address1:(?P.*)\n(?:Billing Contact Address2:(?P.*)\n)?(?:Billing Contact Address3:(?P.*)\n)?Billing Contact City:(?P.*)\n(?:Billing Contact State/Province:(?P.*)\n)?Billing Contact Postal Code:(?P.*)\nBilling Contact Country:(?P.*)\nBilling Contact Country Code:.*\nBilling Contact Phone Number:(?P.*)\n(?:Billing Contact Facsimile Number:(?P.*)\n)?Billing Contact Email:(?P.*)", # .US, .biz (NeuStar) "Billing contact:\n(?: (?P.+)\n)? (?P.+)\n (?P.+)\n (?P.+)\n (?P.+), (?P.+) (?P.+) (?P.+)\n Phone: (?P.*)\n Fax: (?P.*)\n", # Fabulous.com "Billing Contact Information :[ ]*\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n\n", # GAL Communication "Billing Contact:\n Name: (?P.+)\n City: (?P.+)\n State: (?P.+)\n Country: (?P.+)\n", # Akky (.com.mx) ] # Some registries use NIC handle references instead of directly listing contacts... nic_contact_references = { "registrant": [ "registrant:\s*(?P.+)", # nic.at "owner-contact:\s*(?P.+)", # LCN.com "holder-c:\s*(?P.+)", # AFNIC "holder:\s*(?P.+)", # iis.se (they apparently want to be difficult, and won't give you contact info for the handle over their WHOIS service) ], "tech": [ "tech-c:\s*(?P.+)", # nic.at, AFNIC, iis.se "technical-contact:\s*(?P.+)", # LCN.com "n\. \[Technical Contact\] (?P.+)\n", #.co.jp ], "admin": [ "admin-c:\s*(?P.+)", # nic.at, AFNIC, iis.se "admin-contact:\s*(?P.+)", # LCN.com "m\. \[Administrative Contact\] (?P.+)\n", # .co.jp ], "billing": [ "billing-c:\s*(?P.+)", # iis.se "billing-contact:\s*(?P.+)", # LCN.com ] } # Why do the below? The below is meant to handle with an edge case (issue #2) where a partial match followed # by a failure, for a regex containing the \s*.+ pattern, would send the regex module on a wild goose hunt for # matching positions. The workaround is to use \S.* instead of .+, but in the interest of keeping the regexes # consistent and compact, it's more practical to do this (predictable) conversion on runtime. # FIXME: This breaks on NIC contact regex for nic.at. Why? registrant_regexes = [preprocess_regex(regex) for regex in registrant_regexes] tech_contact_regexes = [preprocess_regex(regex) for regex in tech_contact_regexes] admin_contact_regexes = [preprocess_regex(regex) for regex in admin_contact_regexes] billing_contact_regexes = [preprocess_regex(regex) for regex in billing_contact_regexes] nic_contact_regexes = [ "personname:\s*(?P.+)\norganization:\s*(?P.+)\nstreet address:\s*(?P.+)\npostal code:\s*(?P.+)\ncity:\s*(?P.+)\ncountry:\s*(?P.+)\n(?:phone:\s*(?P.+)\n)?(?:fax-no:\s*(?P.+)\n)?(?:e-mail:\s*(?P.+)\n)?nic-hdl:\s*(?P.+)\nchanged:\s*(?P.+)", # nic.at "contact-handle:[ ]*(?P.+)\ncontact:[ ]*(?P.+)\n(?:organisation:[ ]*(?P.+)\n)?address:[ ]*(?P.+)\n(?:address:[ ]*(?P.+)\n)?(?:address:[ ]*(?P.+)\n)?(?:address:[ ]*(?P.+)\n)?address:[ ]*(?P.+)\naddress:[ ]*(?P.+)\naddress:[ ]*(?P.+)\naddress:[ ]*(?P.+)\n(?:phone:[ ]*(?P.+)\n)?(?:fax:[ ]*(?P.+)\n)?(?:email:[ ]*(?P.+)\n)?", # LCN.com "Contact Information:\na\. \[JPNIC Handle\] (?P.+)\nc\. \[Last, First\] (?P.+), (?P.+)\nd\. \[E-Mail\] (?P.+)\ng\. \[Organization\] (?P.+)\nl\. \[Division\] (?P.+)\nn\. \[Title\] (?P.+)\no\. \[TEL\] (?P<phone>.+)\np\. \[FAX\] (?P<fax>.+)\ny\. \[Reply Mail\] .*\n\[Last Update\] (?P<changedate>.+) \(JST\)\n", # JPRS .co.jp contact handle lookup "person:\s*(?P<name>.+)\nnic-hdl:\s*(?P<handle>.+)\n", # .ie "nic-hdl:\s+(?P<handle>.+)\nperson:\s+(?P<name>.+)\n(?:e-mail:\s+(?P<email>.+)\n)?(?:address:\s+(?P<street1>.+?)(?:,+ (?P<street2>.+?)(?:,+ (?P<street3>.+?)(?:,+ (?P<street4>.+?)(?:,+ (?P<street5>.+?)(?:,+ (?P<street6>.+?)(?:,+ (?P<street7>.+?))?)?)?)?)?)?, (?P<city>.+), (?P<state>.+), (?P<country>.+)\n)?(?:phone:\s+(?P<phone>.+)\n)?(?:fax-no:\s+(?P<fax>.+)\n)?", # nic.ir, individual - this is a nasty one. "nic-hdl:\s+(?P<handle>.+)\norg:\s+(?P<organization>.+)\n(?:e-mail:\s+(?P<email>.+)\n)?(?:address:\s+(?P<street1>.+?)(?:,+ (?P<street2>.+?)(?:,+ (?P<street3>.+?)(?:,+ (?P<street4>.+?)(?:,+ (?P<street5>.+?)(?:,+ (?P<street6>.+?)(?:,+ (?P<street7>.+?))?)?)?)?)?)?, (?P<city>.+), (?P<state>.+), (?P<country>.+)\n)?(?:phone:\s+(?P<phone>.+)\n)?(?:fax-no:\s+(?P<fax>.+)\n)?", # nic.ir, organization "nic-hdl:\s*(?P<handle>.+)\ntype:\s*(?P<type>.+)\ncontact:\s*(?P<name>.+)\n(?:.+\n)*?(?:address:\s*(?P<street1>.+)\naddress:\s*(?P<street2>.+)\naddress:\s*(?P<street3>.+)\naddress:\s*(?P<country>.+)\n)?(?:phone:\s*(?P<phone>.+)\n)?(?:fax-no:\s*(?P<fax>.+)\n)?(?:.+\n)*?(?:e-mail:\s*(?P<email>.+)\n)?(?:.+\n)*?changed:\s*(?P<changedate>[0-9]{2}\/[0-9]{2}\/[0-9]{4}).*\n", # AFNIC madness without country field "nic-hdl:\s*(?P<handle>.+)\ntype:\s*(?P<type>.+)\ncontact:\s*(?P<name>.+)\n(?:.+\n)*?(?:address:\s*(?P<street1>.+)\n)?(?:address:\s*(?P<street2>.+)\n)?(?:address:\s*(?P<street3>.+)\n)?(?:phone:\s*(?P<phone>.+)\n)?(?:fax-no:\s*(?P<fax>.+)\n)?(?:.+\n)*?(?:e-mail:\s*(?P<email>.+)\n)?(?:.+\n)*?changed:\s*(?P<changedate>[0-9]{2}\/[0-9]{2}\/[0-9]{4}).*\n", # AFNIC madness any country -at all- "nic-hdl:\s*(?P<handle>.+)\ntype:\s*(?P<type>.+)\ncontact:\s*(?P<name>.+)\n(?:.+\n)*?(?:address:\s*(?P<street1>.+)\n)?(?:address:\s*(?P<street2>.+)\n)?(?:address:\s*(?P<street3>.+)\n)?(?:address:\s*(?P<street4>.+)\n)?country:\s*(?P<country>.+)\n(?:phone:\s*(?P<phone>.+)\n)?(?:fax-no:\s*(?P<fax>.+)\n)?(?:.+\n)*?(?:e-mail:\s*(?P<email>.+)\n)?(?:.+\n)*?changed:\s*(?P<changedate>[0-9]{2}\/[0-9]{2}\/[0-9]{4}).*\n", # AFNIC madness with country field ] organization_regexes = ( r"\sltd\.?($|\s)", r"\sco\.?($|\s)", r"\scorp\.?($|\s)", r"\sinc\.?($|\s)", r"\ss\.?p\.?a\.?($|\s)", r"\ss\.?(c\.?)?r\.?l\.?($|\s)", r"\ss\.?a\.?s\.?($|\s)", r"\sa\.?g\.?($|\s)", r"\sn\.?v\.?($|\s)", r"\sb\.?v\.?($|\s)", r"\sp\.?t\.?y\.?($|\s)", r"\sp\.?l\.?c\.?($|\s)", r"\sv\.?o\.?f\.?($|\s)", r"\sb\.?v\.?b\.?a\.?($|\s)", r"\sg\.?m\.?b\.?h\.?($|\s)", r"\ss\.?a\.?r\.?l\.?($|\s)", ) grammar["_data"]["id"] = precompile_regexes(grammar["_data"]["id"], re.IGNORECASE) grammar["_data"]["status"] = precompile_regexes(grammar["_data"]["status"], re.IGNORECASE) grammar["_data"]["creation_date"] = precompile_regexes(grammar["_data"]["creation_date"], re.IGNORECASE) grammar["_data"]["expiration_date"] = precompile_regexes(grammar["_data"]["expiration_date"], re.IGNORECASE) grammar["_data"]["updated_date"] = precompile_regexes(grammar["_data"]["updated_date"], re.IGNORECASE) grammar["_data"]["registrar"] = precompile_regexes(grammar["_data"]["registrar"], re.IGNORECASE) grammar["_data"]["whois_server"] = precompile_regexes(grammar["_data"]["whois_server"], re.IGNORECASE) grammar["_data"]["nameservers"] = precompile_regexes(grammar["_data"]["nameservers"], re.IGNORECASE) grammar["_data"]["emails"] = precompile_regexes(grammar["_data"]["emails"], re.IGNORECASE) grammar["_dateformats"] = precompile_regexes(grammar["_dateformats"], re.IGNORECASE) registrant_regexes = precompile_regexes(registrant_regexes) tech_contact_regexes = precompile_regexes(tech_contact_regexes) billing_contact_regexes = precompile_regexes(billing_contact_regexes) admin_contact_regexes = precompile_regexes(admin_contact_regexes) nic_contact_regexes = precompile_regexes(nic_contact_regexes) organization_regexes = precompile_regexes(organization_regexes, re.IGNORECASE) nic_contact_references["registrant"] = precompile_regexes(nic_contact_references["registrant"]) nic_contact_references["tech"] = precompile_regexes(nic_contact_references["tech"]) nic_contact_references["admin"] = precompile_regexes(nic_contact_references["admin"]) nic_contact_references["billing"] = precompile_regexes(nic_contact_references["billing"]) if sys.version_info < (3, 0): def is_string(data): """Test for string with support for python 2.""" return isinstance(data, basestring) else: def is_string(data): """Test for string with support for python 3.""" return isinstance(data, str) def parse_raw_whois(raw_data, normalized=[], never_query_handles=True, handle_server=""): data = {} raw_data = [segment.replace("\r", "") for segment in raw_data] # Carriage returns are the devil for segment in raw_data: for rule_key, rule_regexes in grammar['_data'].items(): if (rule_key in data) == False: for line in segment.splitlines(): for regex in rule_regexes: result = re.search(regex, line) if result is not None: val = result.group("val").strip() if val != "": try: data[rule_key].append(val) except KeyError as e: data[rule_key] = [val] # Whois.com is a bit special... Fabulous.com also seems to use this format. As do some others. match = re.search("^\s?Name\s?[Ss]ervers:?\s*\n((?:\s*.+\n)+?\s?)\n", segment, re.MULTILINE) if match is not None: chunk = match.group(1) for match in re.findall("[ ]*(.+)\n", chunk): if match.strip() != "": if not re.match("^[a-zA-Z]+:", match): try: data["nameservers"].append(match.strip()) except KeyError as e: data["nameservers"] = [match.strip()] # Nominet also needs some special attention match = re.search(" Registrar:\n (.+)\n", segment) if match is not None: data["registrar"] = [match.group(1).strip()] match = re.search(" Registration status:\n (.+)\n", segment) if match is not None: data["status"] = [match.group(1).strip()] match = re.search(" Name servers:\n([\s\S]*?\n)\n", segment) if match is not None: chunk = match.group(1) for match in re.findall(" (.+)\n", chunk): match = match.split()[0] try: data["nameservers"].append(match.strip()) except KeyError as e: data["nameservers"] = [match.strip()] # janet (.ac.uk) is kinda like Nominet, but also kinda not match = re.search("Registered By:\n\t(.+)\n", segment) if match is not None: data["registrar"] = [match.group(1).strip()] match = re.search("Entry created:\n\t(.+)\n", segment) if match is not None: data["creation_date"] = [match.group(1).strip()] match = re.search("Renewal date:\n\t(.+)\n", segment) if match is not None: data["expiration_date"] = [match.group(1).strip()] match = re.search("Entry updated:\n\t(.+)\n", segment) if match is not None: data["updated_date"] = [match.group(1).strip()] match = re.search("Servers:([\s\S]*?\n)\n", segment) if match is not None: chunk = match.group(1) for match in re.findall("\t(.+)\n", chunk): match = match.split()[0] try: data["nameservers"].append(match.strip()) except KeyError as e: data["nameservers"] = [match.strip()] # .am plays the same game match = re.search(" DNS servers:([\s\S]*?\n)\n", segment) if match is not None: chunk = match.group(1) for match in re.findall(" (.+)\n", chunk): match = match.split()[0] try: data["nameservers"].append(match.strip()) except KeyError as e: data["nameservers"] = [match.strip()] # SIDN isn't very standard either. And EURid uses a similar format. match = re.search("Registrar:\n\s+(?:Name:\s*)?(\S.*)", segment) if match is not None: data["registrar"].insert(0, match.group(1).strip()) match = re.search("(?:Domain nameservers|Name servers):([\s\S]*?\n)\n", segment) if match is not None: chunk = match.group(1) for match in re.findall("\s+?(.+)\n", chunk): match = match.split()[0] # Prevent nameserver aliases from being picked up. if not match.startswith("[") and not match.endswith("]"): try: data["nameservers"].append(match.strip()) except KeyError as e: data["nameservers"] = [match.strip()] # The .ie WHOIS server puts ambiguous status information in an unhelpful order match = re.search('ren-status:\s*(.+)', segment) if match is not None: data["status"].insert(0, match.group(1).strip()) # nic.it gives us the registrar in a multi-line format... match = re.search('Registrar\n Organization: (.+)\n', segment) if match is not None: data["registrar"] = [match.group(1).strip()] # HKDNR (.hk) provides a weird nameserver format with too much whitespace match = re.search("Name Servers Information:\n\n([\s\S]*?\n)\n", segment) if match is not None: chunk = match.group(1) for match in re.findall("(.+)\n", chunk): match = match.split()[0] try: data["nameservers"].append(match.strip()) except KeyError as e: data["nameservers"] = [match.strip()] # ... and again for TWNIC. match = re.search(" Domain servers in listed order:\n([\s\S]*?\n)\n", segment) if match is not None: chunk = match.group(1) for match in re.findall(" (.+)\n", chunk): match = match.split()[0] try: data["nameservers"].append(match.strip()) except KeyError as e: data["nameservers"] = [match.strip()] data["contacts"] = parse_registrants(raw_data, never_query_handles, handle_server) # Parse dates try: data['expiration_date'] = remove_duplicates(data['expiration_date']) data['expiration_date'] = parse_dates(data['expiration_date']) except KeyError as e: pass # Not present try: data['creation_date'] = remove_duplicates(data['creation_date']) data['creation_date'] = parse_dates(data['creation_date']) except KeyError as e: pass # Not present try: data['updated_date'] = remove_duplicates(data['updated_date']) data['updated_date'] = parse_dates(data['updated_date']) except KeyError as e: pass # Not present try: data['nameservers'] = remove_suffixes(data['nameservers']) data['nameservers'] = remove_duplicates([ns.rstrip(".") for ns in data['nameservers']]) except KeyError as e: pass # Not present try: data['emails'] = remove_duplicates(data['emails']) except KeyError as e: pass # Not present try: data['registrar'] = remove_duplicates(data['registrar']) except KeyError as e: pass # Not present # Remove e-mail addresses if they are already listed for any of the contacts known_emails = [] for contact in ("registrant", "tech", "admin", "billing"): if data["contacts"][contact] is not None: try: known_emails.append(data["contacts"][contact]["email"]) except KeyError as e: pass # No e-mail recorded for this contact... try: data['emails'] = [email for email in data["emails"] if email not in known_emails] except KeyError as e: pass # Not present for key in list(data.keys()): if data[key] is None or len(data[key]) == 0: del data[key] data["raw"] = raw_data if normalized != []: data = normalize_data(data, normalized) return data def normalize_data(data, normalized): for key in ("nameservers", "emails", "whois_server"): if key in data and data[key] is not None and (normalized == True or key in normalized): if is_string(data[key]): data[key] = data[key].lower() else: data[key] = [item.lower() for item in data[key]] for key, threshold in (("registrar", 4), ("status", 3)): if key == "registrar": ignore_nic = True else: ignore_nic = False if key in data and data[key] is not None and (normalized == True or key in normalized): if is_string(data[key]): data[key] = normalize_name(data[key], abbreviation_threshold=threshold, length_threshold=1, ignore_nic=ignore_nic) else: data[key] = [normalize_name(item, abbreviation_threshold=threshold, length_threshold=1, ignore_nic=ignore_nic) for item in data[key]] for contact_type, contact in data['contacts'].items(): if contact is not None: if 'country' in contact and contact['country'] in countries: contact['country'] = countries[contact['country']] if 'city' in contact and contact['city'] in airports: contact['city'] = airports[contact['city']] if 'country' in contact and 'state' in contact: for country, source in (("united states", states_us), ("australia", states_au), ("canada", states_ca)): if country in contact["country"].lower() and contact["state"] in source: contact["state"] = source[contact["state"]] for key in ("email",): if key in contact and contact[key] is not None and (normalized == True or key in normalized): if is_string(contact[key]): contact[key] = contact[key].lower() else: contact[key] = [item.lower() for item in contact[key]] for key in ("name", "street"): if key in contact and contact[key] is not None and (normalized == True or key in normalized): contact[key] = normalize_name(contact[key], abbreviation_threshold=3) for key in ("city", "organization", "state", "country"): if key in contact and contact[key] is not None and (normalized == True or key in normalized): contact[key] = normalize_name(contact[key], abbreviation_threshold=3, length_threshold=3) if "name" in contact and "organization" not in contact: lines = [x.strip() for x in contact["name"].splitlines()] new_lines = [] for i, line in enumerate(lines): for regex in organization_regexes: if re.search(regex, line): new_lines.append(line) del lines[i] break if len(lines) > 0: contact["name"] = "\n".join(lines) else: del contact["name"] if len(new_lines) > 0: contact["organization"] = "\n".join(new_lines) if "street" in contact and "organization" not in contact: lines = [x.strip() for x in contact["street"].splitlines()] if len(lines) > 1: for regex in organization_regexes: if re.search(regex, lines[0]): contact["organization"] = lines[0] contact["street"] = "\n".join(lines[1:]) break for key in list(contact.keys()): try: contact[key] = contact[key].strip(", ") if contact[key] == "-" or contact[key].lower() == "n/a": del contact[key] except AttributeError as e: pass # Not a string return data def normalize_name(value, abbreviation_threshold=4, length_threshold=8, lowercase_domains=True, ignore_nic=False): normalized_lines = [] for line in value.split("\n"): line = line.strip(",") # Get rid of useless comma's if (line.isupper() or line.islower()) and len(line) >= length_threshold: # This line is likely not capitalized properly if ignore_nic == True and "nic" in line.lower(): # This is a registrar name containing 'NIC' - it should probably be all-uppercase. line = line.upper() else: words = line.split() normalized_words = [] if len(words) >= 1: # First word if len(words[0]) >= abbreviation_threshold and "." not in words[0]: normalized_words.append(words[0].capitalize()) elif lowercase_domains and "." in words[0] and not words[0].endswith(".") and not words[0].startswith("."): normalized_words.append(words[0].lower()) else: # Probably an abbreviation or domain, leave it alone normalized_words.append(words[0]) if len(words) >= 3: # Words between the first and last for word in words[1:-1]: if len(word) >= abbreviation_threshold and "." not in word: normalized_words.append(word.capitalize()) elif lowercase_domains and "." in word and not word.endswith(".") and not word.startswith("."): normalized_words.append(word.lower()) else: # Probably an abbreviation or domain, leave it alone normalized_words.append(word) if len(words) >= 2: # Last word if len(words[-1]) >= abbreviation_threshold and "." not in words[-1]: normalized_words.append(words[-1].capitalize()) elif lowercase_domains and "." in words[-1] and not words[-1].endswith(".") and not words[-1].startswith("."): normalized_words.append(words[-1].lower()) else: # Probably an abbreviation or domain, leave it alone normalized_words.append(words[-1]) line = " ".join(normalized_words) normalized_lines.append(line) return "\n".join(normalized_lines) def parse_dates(dates): global grammar parsed_dates = [] for date in dates: for rule in grammar['_dateformats']: result = re.match(rule, date) if result is not None: try: # These are always numeric. If they fail, there is no valid date present. year = int(result.group("year")) day = int(result.group("day")) # Detect and correct shorthand year notation if year < 60: year += 2000 elif year < 100: year += 1900 # This will require some more guesswork - some WHOIS servers present the name of the month try: month = int(result.group("month")) except ValueError as e: # Apparently not a number. Look up the corresponding number. try: month = grammar['_months'][result.group("month").lower()] except KeyError as e: # Unknown month name, default to 0 month = 0 try: hour = int(result.group("hour")) except IndexError as e: hour = 0 except TypeError as e: hour = 0 try: minute = int(result.group("minute")) except IndexError as e: minute = 0 except TypeError as e: minute = 0 try: second = int(result.group("second")) except IndexError as e: second = 0 except TypeError as e: second = 0 break except ValueError as e: # Something went horribly wrong, maybe there is no valid date present? year = 0 month = 0 day = 0 hour = 0 minute = 0 second = 0 print(e.message) # FIXME: This should have proper logging of some sort...? try: if year > 0: try: parsed_dates.append(datetime.datetime(year, month, day, hour, minute, second)) except ValueError as e: # We might have gotten the day and month the wrong way around, let's try it the other way around # If you're not using an ISO-standard date format, you're an evil registrar! parsed_dates.append(datetime.datetime(year, day, month, hour, minute, second)) except UnboundLocalError as e: pass if len(parsed_dates) > 0: return parsed_dates else: return None def remove_duplicates(data): cleaned_list = [] for entry in data: if entry not in cleaned_list: cleaned_list.append(entry) return cleaned_list def remove_suffixes(data): # Removes everything before and after the first non-whitespace continuous string. # Used to get rid of IP suffixes for nameservers. cleaned_list = [] for entry in data: cleaned_list.append(re.search("([^\s]+)\s*[\s]*", entry).group(1).lstrip()) return cleaned_list def parse_registrants(data, never_query_handles=True, handle_server=""): registrant = None tech_contact = None billing_contact = None admin_contact = None for segment in data: for regex in registrant_regexes: match = re.search(regex, segment) if match is not None: registrant = match.groupdict() break for segment in data: for regex in tech_contact_regexes: match = re.search(regex, segment) if match is not None: tech_contact = match.groupdict() break for segment in data: for regex in admin_contact_regexes: match = re.search(regex, segment) if match is not None: admin_contact = match.groupdict() break for segment in data: for regex in billing_contact_regexes: match = re.search(regex, segment) if match is not None: billing_contact = match.groupdict() break # Find NIC handle contact definitions handle_contacts = parse_nic_contact(data) # Find NIC handle references and process them missing_handle_contacts = [] for category in nic_contact_references: for regex in nic_contact_references[category]: for segment in data: match = re.search(regex, segment) if match is not None: data_reference = match.groupdict() if data_reference["handle"] == "-" or re.match("https?:\/\/", data_reference["handle"]) is not None: pass # Reference was either blank or a URL; the latter is to deal with false positives for nic.ru else: found = False for contact in handle_contacts: if contact["handle"] == data_reference["handle"]: found = True data_reference.update(contact) if found == False: # The contact definition was not found in the supplied raw WHOIS data. If the # method has been called with never_query_handles=False, we can use the supplied # WHOIS server for looking up the handle information separately. if never_query_handles == False: try: contact = fetch_nic_contact(data_reference["handle"], handle_server) data_reference.update(contact) except shared.WhoisException as e: pass # No data found. TODO: Log error? else: pass # TODO: Log warning? if category == "registrant": registrant = data_reference elif category == "tech": tech_contact = data_reference elif category == "billing": billing_contact = data_reference elif category == "admin": admin_contact = data_reference break # Post-processing for obj in (registrant, tech_contact, billing_contact, admin_contact): if obj is not None: for key in list(obj.keys()): if obj[key] is None or obj[key].strip() == "": # Just chomp all surrounding whitespace del obj[key] else: obj[key] = obj[key].strip() if "phone_ext" in obj: if "phone" in obj: obj["phone"] += " ext. %s" % obj["phone_ext"] del obj["phone_ext"] if "street1" in obj: street_items = [] i = 1 while True: try: street_items.append(obj["street%d" % i]) del obj["street%d" % i] except KeyError as e: break i += 1 obj["street"] = "\n".join(street_items) if "organization1" in obj: # This is to deal with eg. HKDNR, who allow organization names in multiple languages. organization_items = [] i = 1 while True: try: if obj["organization%d" % i].strip() != "": organization_items.append(obj["organization%d" % i]) del obj["organization%d" % i] except KeyError as e: break i += 1 obj["organization"] = "\n".join(organization_items) if 'changedate' in obj: obj['changedate'] = parse_dates([obj['changedate']])[0] if 'creationdate' in obj: obj['creationdate'] = parse_dates([obj['creationdate']])[0] if 'street' in obj and "\n" in obj["street"] and 'postalcode' not in obj: # Deal with certain mad WHOIS servers that don't properly delimit address data... (yes, AFNIC, looking at you) lines = [x.strip() for x in obj["street"].splitlines()] if " " in lines[-1]: postal_code, city = lines[-1].split(" ", 1) if "." not in lines[-1] and re.match("[0-9]", postal_code) and len(postal_code) >= 3: obj["postalcode"] = postal_code obj["city"] = city obj["street"] = "\n".join(lines[:-1]) if 'firstname' in obj or 'lastname' in obj: elements = [] if 'firstname' in obj: elements.append(obj["firstname"]) if 'lastname' in obj: elements.append(obj["lastname"]) obj["name"] = " ".join(elements) if 'country' in obj and 'city' in obj and (re.match("^R\.?O\.?C\.?$", obj["country"], re.IGNORECASE) or obj["country"].lower() == "republic of china") and obj["city"].lower() == "taiwan": # There's an edge case where some registrants append ", Republic of China" after "Taiwan", and this is mis-parsed # as Taiwan being the city. This is meant to correct that. obj["country"] = "%s, %s" % (obj["city"], obj["country"]) lines = [x.strip() for x in obj["street"].splitlines()] obj["city"] = lines[-1] obj["street"] = "\n".join(lines[:-1]) return { "registrant": registrant, "tech": tech_contact, "admin": admin_contact, "billing": billing_contact, } def fetch_nic_contact(handle, lookup_server): response = net.get_whois_raw(handle, lookup_server) response = [segment.replace("\r", "") for segment in response] # Carriage returns are the devil results = parse_nic_contact(response) if len(results) > 0: return results[0] else: raise shared.WhoisException("No contact data found in the response.") def parse_nic_contact(data): handle_contacts = [] for regex in nic_contact_regexes: for segment in data: matches = re.finditer(regex, segment) for match in matches: handle_contacts.append(match.groupdict()) return handle_contacts