@ -2,7 +2,7 @@ from __future__ import print_function
import re , sys , datetime , csv , pkgutil
from . import net , shared
try :
try :
from io import StringIO
except ImportError :
from cStringIO import StringIO
@ -25,13 +25,13 @@ def read_dataset(filename, destination, abbrev_key, name_key, is_dict=False):
destination [ line [ abbrev_key ] ] = line [ name_key ]
except IOError as e :
pass
airports = { }
countries = { }
states_au = { }
states_us = { }
states_ca = { }
try :
reader = csv . reader ( pkgdata ( " airports.dat " ) . splitlines ( ) )
@ -50,7 +50,7 @@ read_dataset("states_ca.dat", states_ca, "abbreviation", "name", is_dict=True)
def precompile_regexes ( source , flags = 0 ) :
return [ re . compile ( regex , flags ) for regex in source ]
grammar = {
" _data " : {
' id ' : [ ' Domain ID:[ ]*(?P<val>.+) ' ] ,
@ -157,7 +157,7 @@ grammar = {
' (?<=[ .] {2} )(?P<val>[a-z0-9-]+ \ .d?ns[0-9]* \ .([a-z0-9-]+ \ .)+[a-z0-9]+) ' ,
' (?<=[ .] {2} )(?P<val>([a-z0-9-]+ \ .)+[a-z0-9]+)( \ s+([0-9] { 1,3} \ .) {3} [0-9] { 1,3}) ' ,
' (?<=[ .] {2} )[^a-z0-9.-](?P<val>d?ns \ .([a-z0-9-]+ \ .)+[a-z0-9]+) ' ,
' Nserver: \ s*(?P<val>.+) ' ] ,
' Nserver: \ s*(?P<val>.+) ' ] ,
' emails ' : [ ' (?P<val>[ \ w.-]+@[ \ w.-]+ \ .[ \ w] { 2,6}) ' , # Really need to fix this, much longer TLDs now exist...
' (?P<val>[ \ w.-]+ \ sAT \ s[ \ w.-]+ \ sDOT \ s[ \ w] { 2,6}) ' ]
} ,
@ -235,7 +235,7 @@ registrant_regexes = [
" Domain Owner: \n \t (?P<organization>.+) \n \n [ \ s \ S]*?(?:Registrant Contact: \n \t (?P<name>.+))? \n \n Registrant(?: ' s)? (?:a|A)ddress:(?: \n \t (?P<street1>.+) \n (?: \t (?P<street2>.+) \n )?(?: \t (?P<street3>.+) \n )? \t (?P<city>.+) \n \t (?P<postalcode>.+))? \n \t (?P<country>.+)(?: \n \t (?P<phone>.+) \ (Phone \ ) \n \t (?P<fax>.+) \ (FAX \ ) \n \t (?P<email>.+))? \n \n " , # .ac.uk - what a mess...
" Registrant ID: (?P<handle>.+) \n Registrant: (?P<name>.+) \n Registrant Contact Email: (?P<email>.+) " , # .cn (CNNIC)
" Registrant contact: \n (?P<name>.+) \n (?P<street>.*) \n (?P<city>.+), (?P<state>.+) (?P<postalcode>.+) (?P<country>.+) \n \n " , # Fabulous.com
" registrant-name: \ s*(?P<name>.+) \n ( registrant-organization:\ s*(?P<organization>.*) \n )?registrant-type: \ s*(?P<type>.+) \n registrant-address: \ s*(?P<street>.+) \n registrant-postcode: \ s*(?P<postalcode>.+) \n registrant-city: \ s*(?P<city>.+) \n registrant-country: \ s*(?P<country>.+) \n (?:registrant-phone: \ s*(?P<phone>.+) \n )?(?:registrant-email: \ s*(?P<email>.+) \n )? " , # Hetzner
" registrant-name: \ s*(?P<name>.+) \n ( ?: registrant-organization:\ s*(?P<organization>.*) \n )?registrant-type: \ s*(?P<type>.+) \n registrant-address: \ s*(?P<street>.+) \n registrant-postcode: \ s*(?P<postalcode>.+) \n registrant-city: \ s*(?P<city>.+) \n registrant-country: \ s*(?P<country>.+) \n (?:registrant-phone: \ s*(?P<phone>.+) \n )?(?:registrant-email: \ s*(?P<email>.+) \n )? " , # Hetzner
" Registrant Contact Information :[ ]* \n [ ]+(?P<firstname>.*) \n [ ]+(?P<lastname>.*) \n [ ]+(?P<organization>.*) \n [ ]+(?P<email>.*) \n [ ]+(?P<street>.*) \n [ ]+(?P<city>.*) \n [ ]+(?P<postalcode>.*) \n [ ]+(?P<phone>.*) \n [ ]+(?P<fax>.*) \n \n " , # GAL Communication
" Contact Information : For Customer # [0-9]+[ ]* \n [ ]+(?P<firstname>.*) \n [ ]+(?P<lastname>.*) \n [ ]+(?P<organization>.*) \n [ ]+(?P<email>.*) \n [ ]+(?P<street>.*) \n [ ]+(?P<city>.*) \n [ ]+(?P<postalcode>.*) \n [ ]+(?P<phone>.*) \n [ ]+(?P<fax>.*) \n \n " , # GAL Communication alternative (private WHOIS) format?
" Registrant: \n Name: (?P<name>.+) \n City: (?P<city>.+) \n State: (?P<state>.+) \n Country: (?P<country>.+) \n " , # Akky (.com.mx)
@ -271,7 +271,7 @@ tech_contact_regexes = [
" Technical Contacts \n Name: (?P<name>.+) \n (?: Organization: (?P<organization>.+) \n )? ContactID: (?P<handle>.+) \n (?: Address: (?P<street1>.+) \n (?: (?P<street2>.+) \n (?: (?P<street3>.+) \n )?)? (?P<city>.+) \n (?P<postalcode>.+) \n (?P<state>.+) \n (?P<country>.+) \n )?(?: Created: (?P<creationdate>.+) \n )?(?: Last Update: (?P<changedate>.+) \n )? " , # nic.it // NOTE: Why does this say 'Contacts'? Can it have multiple?
" Tech Name[.]* (?P<name>.*) \n Tech Address[.]* (?P<street1>.*) \n Tech Address[.]* (?P<street2>.*) \n (?: Tech Address[.]* (?P<street3>.*) \n )? Tech Address[.]* (?P<city>.*) \n Tech Address[.]* (?P<postalcode>.*) \n Tech Address[.]* (?P<state>.*) \n Tech Address[.]* (?P<country>.*) \n Tech Email[.]* (?P<email>.*) \n Tech Phone[.]* (?P<phone>.*) \n Tech Fax[.]* (?P<fax>.*) " , # Melbourne IT
" Technical contact: \n (?: (?P<organization>.+) \n )? (?P<name>.+) \n (?P<email>.+) \n (?P<street>.+) \n (?P<city>.+), (?P<state>.+) (?P<postalcode>.+) (?P<country>.+) \n Phone: (?P<phone>.*) \n Fax: (?P<fax>.*) \n " , # Fabulous.com
" tech-c-name: \ s*(?P<name>.+) \n ( tech-c-organization:\ s*(?P<organization>.*) \n )?tech-c-type: \ s*(?P<type>.+) \n tech-c-address: \ s*(?P<street>.+) \n tech-c-postcode: \ s*(?P<postalcode>.+) \n tech-c-city: \ s*(?P<city>.+) \n tech-c-country: \ s*(?P<country>.+) \n (?:tech-c-phone: \ s*(?P<phone>.+) \n )?(?:tech-c-email: \ s*(?P<email>.+) \n )? " , # Hetzner
" tech-c-name: \ s*(?P<name>.+) \n ( ?: tech-c-organization:\ s*(?P<organization>.*) \n )?tech-c-type: \ s*(?P<type>.+) \n tech-c-address: \ s*(?P<street>.+) \n tech-c-postcode: \ s*(?P<postalcode>.+) \n tech-c-city: \ s*(?P<city>.+) \n tech-c-country: \ s*(?P<country>.+) \n (?:tech-c-phone: \ s*(?P<phone>.+) \n )?(?:tech-c-email: \ s*(?P<email>.+) \n )? " , # Hetzner
" Admin Contact Information :[ ]* \n [ ]+(?P<firstname>.*) \n [ ]+(?P<lastname>.*) \n [ ]+(?P<organization>.*) \n [ ]+(?P<email>.*) \n [ ]+(?P<street>.*) \n [ ]+(?P<city>.*) \n [ ]+(?P<postalcode>.*) \n [ ]+(?P<phone>.*) \n [ ]+(?P<fax>.*) \n \n " , # GAL Communication
" Technical contact: \n (?P<name>.+) \n (?P<organization>.*) \n (?P<street>.+) \n (?P<city>.+) (?P<state> \ S+),[ ]+(?P<postalcode>.+) \n (?P<country>.+) \n (?P<email>.+) \n (?P<phone>.*) \n (?P<fax>.*) " , # .am
" Technical: \n \ s*Name: \ s*(?P<name>.*) \n \ s*Organisation: \ s*(?P<organization>.*) \n \ s*Language:.* \n \ s*Phone: \ s*(?P<phone>.*) \n \ s*Fax: \ s*(?P<fax>.*) \n \ s*Email: \ s*(?P<email>.*) \n " , # EURid
@ -304,7 +304,7 @@ admin_contact_regexes = [
" Admin Contact \n Name: (?P<name>.+) \n (?: Organization: (?P<organization>.+) \n )? ContactID: (?P<handle>.+) \n (?: Address: (?P<street1>.+) \n (?: (?P<street2>.+) \n (?: (?P<street3>.+) \n )?)? (?P<city>.+) \n (?P<postalcode>.+) \n (?P<state>.+) \n (?P<country>.+) \n )?(?: Created: (?P<creationdate>.+) \n )?(?: Last Update: (?P<changedate>.+) \n )? " , # nic.it
" Admin Name[.]* (?P<name>.*) \n Admin Address[.]* (?P<street1>.*) \n Admin Address[.]* (?P<street2>.*) \n (?: Admin Address[.]* (?P<street3>.*) \n )? Admin Address[.]* (?P<city>.*) \n Admin Address[.]* (?P<postalcode>.*) \n Admin Address[.]* (?P<state>.*) \n Admin Address[.]* (?P<country>.*) \n Admin Email[.]* (?P<email>.*) \n Admin Phone[.]* (?P<phone>.*) \n Admin Fax[.]* (?P<fax>.*) " , # Melbourne IT
" Administrative contact: \n (?: (?P<organization>.+) \n )? (?P<name>.+) \n (?P<email>.+) \n (?P<street>.+) \n (?P<city>.+), (?P<state>.+) (?P<postalcode>.+) (?P<country>.+) \n Phone: (?P<phone>.*) \n Fax: (?P<fax>.*) \n " , # Fabulous.com
" admin-c-name: \ s*(?P<name>.+) \n ( admin-c-organization:\ s*(?P<organization>.*) \n )?admin-c-type: \ s*(?P<type>.+) \n admin-c-address: \ s*(?P<street>.+) \n admin-c-postcode: \ s*(?P<postalcode>.+) \n admin-c-city: \ s*(?P<city>.+) \n admin-c-country: \ s*(?P<country>.+) \n (?:admin-c-phone: \ s*(?P<phone>.+) \n )?(?:admin-c-email: \ s*(?P<email>.+) \n )? " , # Hetzner
" admin-c-name: \ s*(?P<name>.+) \n ( ?: admin-c-organization:\ s*(?P<organization>.*) \n )?admin-c-type: \ s*(?P<type>.+) \n admin-c-address: \ s*(?P<street>.+) \n admin-c-postcode: \ s*(?P<postalcode>.+) \n admin-c-city: \ s*(?P<city>.+) \n admin-c-country: \ s*(?P<country>.+) \n (?:admin-c-phone: \ s*(?P<phone>.+) \n )?(?:admin-c-email: \ s*(?P<email>.+) \n )? " , # Hetzner
" Tech Contact Information :[ ]* \n [ ]+(?P<firstname>.*) \n [ ]+(?P<lastname>.*) \n [ ]+(?P<organization>.*) \n [ ]+(?P<email>.*) \n [ ]+(?P<street>.*) \n [ ]+(?P<city>.*) \n [ ]+(?P<postalcode>.*) \n [ ]+(?P<phone>.*) \n [ ]+(?P<fax>.*) \n \n " , # GAL Communication
" Administrative contact: \n (?P<name>.+) \n (?P<organization>.*) \n (?P<street>.+) \n (?P<city>.+) (?P<state> \ S+),[ ]+(?P<postalcode>.+) \n (?P<country>.+) \n (?P<email>.+) \n (?P<phone>.*) \n (?P<fax>.*) " , # .am
" Administrative Contact: \n Name: (?P<name>.+) \n City: (?P<city>.+) \n State: (?P<state>.+) \n Country: (?P<country>.+) \n " , # Akky (.com.mx)
@ -553,7 +553,7 @@ def parse_raw_whois(raw_data, normalized=None, never_query_handles=True, handle_
data [ " nameservers " ] . append ( match . strip ( ) )
except KeyError as e :
data [ " nameservers " ] = [ match . strip ( ) ]
data [ " contacts " ] = parse_registrants ( raw_data , never_query_handles , handle_server )
@ -645,7 +645,7 @@ def normalize_data(data, normalized):
for country , source in ( ( " united states " , states_us ) , ( " australia " , states_au ) , ( " canada " , states_ca ) ) :
if country in contact [ " country " ] . lower ( ) and contact [ " state " ] in source :
contact [ " state " ] = source [ contact [ " state " ] ]
for key in ( " email " , ) :
if key in contact and contact [ key ] is not None and ( normalized == True or key in normalized ) :
if is_string ( contact [ key ] ) :
@ -660,7 +660,7 @@ def normalize_data(data, normalized):
for key in ( " city " , " organization " , " state " , " country " ) :
if key in contact and contact [ key ] is not None and ( normalized == True or key in normalized ) :
contact [ key ] = normalize_name ( contact [ key ] , abbreviation_threshold = 3 , length_threshold = 3 )
if " name " in contact and " organization " not in contact :
lines = [ x . strip ( ) for x in contact [ " name " ] . splitlines ( ) ]
new_lines = [ ]
@ -674,10 +674,10 @@ def normalize_data(data, normalized):
contact [ " name " ] = " \n " . join ( lines )
else :
del contact [ " name " ]
if len ( new_lines ) > 0 :
contact [ " organization " ] = " \n " . join ( new_lines )
if " street " in contact and " organization " not in contact :
lines = [ x . strip ( ) for x in contact [ " street " ] . splitlines ( ) ]
if len ( lines ) > 1 :
@ -686,7 +686,7 @@ def normalize_data(data, normalized):
contact [ " organization " ] = lines [ 0 ]
contact [ " street " ] = " \n " . join ( lines [ 1 : ] )
break
for key in list ( contact . keys ( ) ) :
try :
contact [ key ] = contact [ key ] . strip ( " , " )
@ -831,10 +831,10 @@ def remove_suffixes(data):
# Removes everything before and after the first non-whitespace continuous string.
# Used to get rid of IP suffixes for nameservers.
cleaned_list = [ ]
for entry in data :
cleaned_list . append ( re . search ( " ([^ \ s]+) \ s*[ \ s]* " , entry ) . group ( 1 ) . lstrip ( ) )
return cleaned_list
def parse_registrants ( data , never_query_handles = True , handle_server = " " ) :
@ -911,7 +911,7 @@ def parse_registrants(data, never_query_handles=True, handle_server=""):
elif category == " admin " :
admin_contact = data_reference
break
# Post-processing
for obj in ( registrant , tech_contact , billing_contact , admin_contact ) :
if obj is not None :
@ -986,12 +986,12 @@ def fetch_nic_contact(handle, lookup_server):
response = net . get_whois_raw ( handle , lookup_server )
response = [ segment . replace ( " \r " , " " ) for segment in response ] # Carriage returns are the devil
results = parse_nic_contact ( response )
if len ( results ) > 0 :
return results [ 0 ]
else :
raise shared . WhoisException ( " No contact data found in the response. " )
def parse_nic_contact ( data ) :
handle_contacts = [ ]
for regex in nic_contact_regexes :
@ -999,5 +999,5 @@ def parse_nic_contact(data):
matches = re . finditer ( regex , segment )
for match in matches :
handle_contacts . append ( match . groupdict ( ) )
return handle_contacts