Whitespace fixes, and turning 'organization' group for Hetzner into a non-capturing group.

develop
Sven Slootweg 10 years ago
parent 29e7ce2b62
commit d07cef1774

@ -2,7 +2,7 @@ from __future__ import print_function
import re, sys, datetime, csv, pkgutil
from . import net, shared
try:
try:
from io import StringIO
except ImportError:
from cStringIO import StringIO
@ -25,13 +25,13 @@ def read_dataset(filename, destination, abbrev_key, name_key, is_dict=False):
destination[line[abbrev_key]] = line[name_key]
except IOError as e:
pass
airports = {}
countries = {}
states_au = {}
states_us = {}
states_ca = {}
try:
reader = csv.reader(pkgdata("airports.dat").splitlines())
@ -50,7 +50,7 @@ read_dataset("states_ca.dat", states_ca, "abbreviation", "name", is_dict=True)
def precompile_regexes(source, flags=0):
return [re.compile(regex, flags) for regex in source]
grammar = {
"_data": {
'id': ['Domain ID:[ ]*(?P<val>.+)'],
@ -157,7 +157,7 @@ grammar = {
'(?<=[ .]{2})(?P<val>[a-z0-9-]+\.d?ns[0-9]*\.([a-z0-9-]+\.)+[a-z0-9]+)',
'(?<=[ .]{2})(?P<val>([a-z0-9-]+\.)+[a-z0-9]+)(\s+([0-9]{1,3}\.){3}[0-9]{1,3})',
'(?<=[ .]{2})[^a-z0-9.-](?P<val>d?ns\.([a-z0-9-]+\.)+[a-z0-9]+)',
'Nserver:\s*(?P<val>.+)'],
'Nserver:\s*(?P<val>.+)'],
'emails': ['(?P<val>[\w.-]+@[\w.-]+\.[\w]{2,6})', # Really need to fix this, much longer TLDs now exist...
'(?P<val>[\w.-]+\sAT\s[\w.-]+\sDOT\s[\w]{2,6})']
},
@ -235,7 +235,7 @@ registrant_regexes = [
"Domain Owner:\n\t(?P<organization>.+)\n\n[\s\S]*?(?:Registrant Contact:\n\t(?P<name>.+))?\n\nRegistrant(?:'s)? (?:a|A)ddress:(?:\n\t(?P<street1>.+)\n(?:\t(?P<street2>.+)\n)?(?:\t(?P<street3>.+)\n)?\t(?P<city>.+)\n\t(?P<postalcode>.+))?\n\t(?P<country>.+)(?:\n\t(?P<phone>.+) \(Phone\)\n\t(?P<fax>.+) \(FAX\)\n\t(?P<email>.+))?\n\n", # .ac.uk - what a mess...
"Registrant ID: (?P<handle>.+)\nRegistrant: (?P<name>.+)\nRegistrant Contact Email: (?P<email>.+)", # .cn (CNNIC)
"Registrant contact:\n (?P<name>.+)\n (?P<street>.*)\n (?P<city>.+), (?P<state>.+) (?P<postalcode>.+) (?P<country>.+)\n\n", # Fabulous.com
"registrant-name:\s*(?P<name>.+)\n(registrant-organization:\s*(?P<organization>.*)\n)?registrant-type:\s*(?P<type>.+)\nregistrant-address:\s*(?P<street>.+)\nregistrant-postcode:\s*(?P<postalcode>.+)\nregistrant-city:\s*(?P<city>.+)\nregistrant-country:\s*(?P<country>.+)\n(?:registrant-phone:\s*(?P<phone>.+)\n)?(?:registrant-email:\s*(?P<email>.+)\n)?", # Hetzner
"registrant-name:\s*(?P<name>.+)\n(?:registrant-organization:\s*(?P<organization>.*)\n)?registrant-type:\s*(?P<type>.+)\nregistrant-address:\s*(?P<street>.+)\nregistrant-postcode:\s*(?P<postalcode>.+)\nregistrant-city:\s*(?P<city>.+)\nregistrant-country:\s*(?P<country>.+)\n(?:registrant-phone:\s*(?P<phone>.+)\n)?(?:registrant-email:\s*(?P<email>.+)\n)?", # Hetzner
"Registrant Contact Information :[ ]*\n[ ]+(?P<firstname>.*)\n[ ]+(?P<lastname>.*)\n[ ]+(?P<organization>.*)\n[ ]+(?P<email>.*)\n[ ]+(?P<street>.*)\n[ ]+(?P<city>.*)\n[ ]+(?P<postalcode>.*)\n[ ]+(?P<phone>.*)\n[ ]+(?P<fax>.*)\n\n", # GAL Communication
"Contact Information : For Customer # [0-9]+[ ]*\n[ ]+(?P<firstname>.*)\n[ ]+(?P<lastname>.*)\n[ ]+(?P<organization>.*)\n[ ]+(?P<email>.*)\n[ ]+(?P<street>.*)\n[ ]+(?P<city>.*)\n[ ]+(?P<postalcode>.*)\n[ ]+(?P<phone>.*)\n[ ]+(?P<fax>.*)\n\n", # GAL Communication alternative (private WHOIS) format?
"Registrant:\n Name: (?P<name>.+)\n City: (?P<city>.+)\n State: (?P<state>.+)\n Country: (?P<country>.+)\n", # Akky (.com.mx)
@ -271,7 +271,7 @@ tech_contact_regexes = [
"Technical Contacts\n Name: (?P<name>.+)\n(?: Organization: (?P<organization>.+)\n)? ContactID: (?P<handle>.+)\n(?: Address: (?P<street1>.+)\n(?: (?P<street2>.+)\n(?: (?P<street3>.+)\n)?)? (?P<city>.+)\n (?P<postalcode>.+)\n (?P<state>.+)\n (?P<country>.+)\n)?(?: Created: (?P<creationdate>.+)\n)?(?: Last Update: (?P<changedate>.+)\n)?", # nic.it // NOTE: Why does this say 'Contacts'? Can it have multiple?
"Tech Name[.]* (?P<name>.*)\n Tech Address[.]* (?P<street1>.*)\n Tech Address[.]* (?P<street2>.*)\n(?: Tech Address[.]* (?P<street3>.*)\n)? Tech Address[.]* (?P<city>.*)\n Tech Address[.]* (?P<postalcode>.*)\n Tech Address[.]* (?P<state>.*)\n Tech Address[.]* (?P<country>.*)\n Tech Email[.]* (?P<email>.*)\n Tech Phone[.]* (?P<phone>.*)\n Tech Fax[.]* (?P<fax>.*)", # Melbourne IT
"Technical contact:\n(?: (?P<organization>.+)\n)? (?P<name>.+)\n (?P<email>.+)\n (?P<street>.+)\n (?P<city>.+), (?P<state>.+) (?P<postalcode>.+) (?P<country>.+)\n Phone: (?P<phone>.*)\n Fax: (?P<fax>.*)\n", # Fabulous.com
"tech-c-name:\s*(?P<name>.+)\n(tech-c-organization:\s*(?P<organization>.*)\n)?tech-c-type:\s*(?P<type>.+)\ntech-c-address:\s*(?P<street>.+)\ntech-c-postcode:\s*(?P<postalcode>.+)\ntech-c-city:\s*(?P<city>.+)\ntech-c-country:\s*(?P<country>.+)\n(?:tech-c-phone:\s*(?P<phone>.+)\n)?(?:tech-c-email:\s*(?P<email>.+)\n)?", # Hetzner
"tech-c-name:\s*(?P<name>.+)\n(?:tech-c-organization:\s*(?P<organization>.*)\n)?tech-c-type:\s*(?P<type>.+)\ntech-c-address:\s*(?P<street>.+)\ntech-c-postcode:\s*(?P<postalcode>.+)\ntech-c-city:\s*(?P<city>.+)\ntech-c-country:\s*(?P<country>.+)\n(?:tech-c-phone:\s*(?P<phone>.+)\n)?(?:tech-c-email:\s*(?P<email>.+)\n)?", # Hetzner
"Admin Contact Information :[ ]*\n[ ]+(?P<firstname>.*)\n[ ]+(?P<lastname>.*)\n[ ]+(?P<organization>.*)\n[ ]+(?P<email>.*)\n[ ]+(?P<street>.*)\n[ ]+(?P<city>.*)\n[ ]+(?P<postalcode>.*)\n[ ]+(?P<phone>.*)\n[ ]+(?P<fax>.*)\n\n", # GAL Communication
" Technical contact:\n (?P<name>.+)\n (?P<organization>.*)\n (?P<street>.+)\n (?P<city>.+) (?P<state>\S+),[ ]+(?P<postalcode>.+)\n (?P<country>.+)\n (?P<email>.+)\n (?P<phone>.*)\n (?P<fax>.*)", # .am
"Technical:\n\s*Name:\s*(?P<name>.*)\n\s*Organisation:\s*(?P<organization>.*)\n\s*Language:.*\n\s*Phone:\s*(?P<phone>.*)\n\s*Fax:\s*(?P<fax>.*)\n\s*Email:\s*(?P<email>.*)\n", # EURid
@ -304,7 +304,7 @@ admin_contact_regexes = [
"Admin Contact\n Name: (?P<name>.+)\n(?: Organization: (?P<organization>.+)\n)? ContactID: (?P<handle>.+)\n(?: Address: (?P<street1>.+)\n(?: (?P<street2>.+)\n(?: (?P<street3>.+)\n)?)? (?P<city>.+)\n (?P<postalcode>.+)\n (?P<state>.+)\n (?P<country>.+)\n)?(?: Created: (?P<creationdate>.+)\n)?(?: Last Update: (?P<changedate>.+)\n)?", # nic.it
"Admin Name[.]* (?P<name>.*)\n Admin Address[.]* (?P<street1>.*)\n Admin Address[.]* (?P<street2>.*)\n(?: Admin Address[.]* (?P<street3>.*)\n)? Admin Address[.]* (?P<city>.*)\n Admin Address[.]* (?P<postalcode>.*)\n Admin Address[.]* (?P<state>.*)\n Admin Address[.]* (?P<country>.*)\n Admin Email[.]* (?P<email>.*)\n Admin Phone[.]* (?P<phone>.*)\n Admin Fax[.]* (?P<fax>.*)", # Melbourne IT
"Administrative contact:\n(?: (?P<organization>.+)\n)? (?P<name>.+)\n (?P<email>.+)\n (?P<street>.+)\n (?P<city>.+), (?P<state>.+) (?P<postalcode>.+) (?P<country>.+)\n Phone: (?P<phone>.*)\n Fax: (?P<fax>.*)\n", # Fabulous.com
"admin-c-name:\s*(?P<name>.+)\n(admin-c-organization:\s*(?P<organization>.*)\n)?admin-c-type:\s*(?P<type>.+)\nadmin-c-address:\s*(?P<street>.+)\nadmin-c-postcode:\s*(?P<postalcode>.+)\nadmin-c-city:\s*(?P<city>.+)\nadmin-c-country:\s*(?P<country>.+)\n(?:admin-c-phone:\s*(?P<phone>.+)\n)?(?:admin-c-email:\s*(?P<email>.+)\n)?", # Hetzner
"admin-c-name:\s*(?P<name>.+)\n(?:admin-c-organization:\s*(?P<organization>.*)\n)?admin-c-type:\s*(?P<type>.+)\nadmin-c-address:\s*(?P<street>.+)\nadmin-c-postcode:\s*(?P<postalcode>.+)\nadmin-c-city:\s*(?P<city>.+)\nadmin-c-country:\s*(?P<country>.+)\n(?:admin-c-phone:\s*(?P<phone>.+)\n)?(?:admin-c-email:\s*(?P<email>.+)\n)?", # Hetzner
"Tech Contact Information :[ ]*\n[ ]+(?P<firstname>.*)\n[ ]+(?P<lastname>.*)\n[ ]+(?P<organization>.*)\n[ ]+(?P<email>.*)\n[ ]+(?P<street>.*)\n[ ]+(?P<city>.*)\n[ ]+(?P<postalcode>.*)\n[ ]+(?P<phone>.*)\n[ ]+(?P<fax>.*)\n\n", # GAL Communication
" Administrative contact:\n (?P<name>.+)\n (?P<organization>.*)\n (?P<street>.+)\n (?P<city>.+) (?P<state>\S+),[ ]+(?P<postalcode>.+)\n (?P<country>.+)\n (?P<email>.+)\n (?P<phone>.*)\n (?P<fax>.*)", # .am
"Administrative Contact:\n Name: (?P<name>.+)\n City: (?P<city>.+)\n State: (?P<state>.+)\n Country: (?P<country>.+)\n", # Akky (.com.mx)
@ -553,7 +553,7 @@ def parse_raw_whois(raw_data, normalized=None, never_query_handles=True, handle_
data["nameservers"].append(match.strip())
except KeyError as e:
data["nameservers"] = [match.strip()]
data["contacts"] = parse_registrants(raw_data, never_query_handles, handle_server)
@ -645,7 +645,7 @@ def normalize_data(data, normalized):
for country, source in (("united states", states_us), ("australia", states_au), ("canada", states_ca)):
if country in contact["country"].lower() and contact["state"] in source:
contact["state"] = source[contact["state"]]
for key in ("email",):
if key in contact and contact[key] is not None and (normalized == True or key in normalized):
if is_string(contact[key]):
@ -660,7 +660,7 @@ def normalize_data(data, normalized):
for key in ("city", "organization", "state", "country"):
if key in contact and contact[key] is not None and (normalized == True or key in normalized):
contact[key] = normalize_name(contact[key], abbreviation_threshold=3, length_threshold=3)
if "name" in contact and "organization" not in contact:
lines = [x.strip() for x in contact["name"].splitlines()]
new_lines = []
@ -674,10 +674,10 @@ def normalize_data(data, normalized):
contact["name"] = "\n".join(lines)
else:
del contact["name"]
if len(new_lines) > 0:
contact["organization"] = "\n".join(new_lines)
if "street" in contact and "organization" not in contact:
lines = [x.strip() for x in contact["street"].splitlines()]
if len(lines) > 1:
@ -686,7 +686,7 @@ def normalize_data(data, normalized):
contact["organization"] = lines[0]
contact["street"] = "\n".join(lines[1:])
break
for key in list(contact.keys()):
try:
contact[key] = contact[key].strip(", ")
@ -831,10 +831,10 @@ def remove_suffixes(data):
# Removes everything before and after the first non-whitespace continuous string.
# Used to get rid of IP suffixes for nameservers.
cleaned_list = []
for entry in data:
cleaned_list.append(re.search("([^\s]+)\s*[\s]*", entry).group(1).lstrip())
return cleaned_list
def parse_registrants(data, never_query_handles=True, handle_server=""):
@ -911,7 +911,7 @@ def parse_registrants(data, never_query_handles=True, handle_server=""):
elif category == "admin":
admin_contact = data_reference
break
# Post-processing
for obj in (registrant, tech_contact, billing_contact, admin_contact):
if obj is not None:
@ -986,12 +986,12 @@ def fetch_nic_contact(handle, lookup_server):
response = net.get_whois_raw(handle, lookup_server)
response = [segment.replace("\r", "") for segment in response] # Carriage returns are the devil
results = parse_nic_contact(response)
if len(results) > 0:
return results[0]
else:
raise shared.WhoisException("No contact data found in the response.")
def parse_nic_contact(data):
handle_contacts = []
for regex in nic_contact_regexes:
@ -999,5 +999,5 @@ def parse_nic_contact(data):
matches = re.finditer(regex, segment)
for match in matches:
handle_contacts.append(match.groupdict())
return handle_contacts

Loading…
Cancel
Save