From 9b6416af3f6957c524627124119f8de84799a7e6 Mon Sep 17 00:00:00 2001 From: Sven Slootweg Date: Mon, 28 Apr 2014 01:31:23 +0200 Subject: [PATCH] Correctly parse EURid data, fixes #9 --- pythonwhois/parse.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/pythonwhois/parse.py b/pythonwhois/parse.py index ecad144..3d75854 100644 --- a/pythonwhois/parse.py +++ b/pythonwhois/parse.py @@ -172,7 +172,7 @@ def parse_raw_whois(raw_data, normalized=[]): data[rule_key] = [val] # Whois.com is a bit special... Fabulous.com also seems to use this format. - match = re.search("Name Servers:([/s/S]+)\n\n", segment) + match = re.search("Name [Ss]ervers:([/s/S]+)\n\n", segment) if match is not None: chunk = match.group(1) for match in re.findall("[ ]+(.+)\n", chunk): @@ -206,19 +206,21 @@ def parse_raw_whois(raw_data, normalized=[]): data["nameservers"].append(match.strip()) except KeyError as e: data["nameservers"] = [match.strip()] - # SIDN isn't very standard either. - match = re.search("Registrar:\n\s+(\S.*)", segment) + # SIDN isn't very standard either. And EURid uses a similar format. + match = re.search("Registrar:\n\s+(?:Name:\s*)?(\S.*)", segment) if match is not None: data["registrar"].insert(0, match.group(1).strip()) - match = re.search("Domain nameservers:([\s\S]*?\n)\n", segment) + match = re.search("(?:Domain nameservers|Name servers):([\s\S]*?\n)\n", segment) if match is not None: chunk = match.group(1) - for match in re.findall(" (.+)\n", chunk): + for match in re.findall("\s+?(.+)\n", chunk): match = match.split()[0] - try: - data["nameservers"].append(match.strip()) - except KeyError as e: - data["nameservers"] = [match.strip()] + # Prevent nameserver aliases from being picked up. + if not match.startswith("[") and not match.endswith("]"): + try: + data["nameservers"].append(match.strip()) + except KeyError as e: + data["nameservers"] = [match.strip()] # The .ie WHOIS server puts ambiguous status information in an unhelpful order match = re.search('ren-status:\s*(.+)', segment) if match is not None: @@ -424,7 +426,7 @@ def parse_dates(dates): hour = 0 minute = 0 second = 0 - print(e.message) + print(e.message) # FIXME: This should have proper logging of some sort...? try: if year > 0: try: @@ -511,6 +513,7 @@ def parse_registrants(data): "tech-c-name:\s*(?P.+)\ntech-c-type:\s*(?P.+)\ntech-c-address:\s*(?P.+)\ntech-c-postcode:\s*(?P.+)\ntech-c-city:\s*(?P.+)\ntech-c-country:\s*(?P.+)\n(?:tech-c-phone:\s*(?P.+)\n)?(?:tech-c-email:\s*(?P.+)\n)?", # Hetzner "Admin Contact Information :[ ]*\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n\n", # GAL Communication " Technical contact:\n (?P.+)\n (?P.*)\n (?P.+)\n (?P.+) (?P\S+),[ ]+(?P.+)\n (?P.+)\n (?P.+)\n (?P.*)\n (?P.*)", # .am + "Technical:\n\s*Name:\s*(?P.*)\n\s*Organisation:\s*(?P.*)\n\s*Language:.*\n\s*Phone:\s*(?P.*)\n\s*Fax:\s*(?P.*)\n\s*Email:\s*(?P.*)\n", # EURid "\[Zone-C\]\nType: (?P.+)\nName: (?P.+)\n(Organisation: (?P.+)\n){0,1}(Address: (?P.+)\n){1}(Address: (?P.+)\n){0,1}(Address: (?P.+)\n){0,1}(Address: (?P.+)\n){0,1}PostalCode: (?P.+)\nCity: (?P.+)\nCountryCode: (?P[A-Za-z]{2})\nPhone: (?P.+)\nFax: (?P.+)\nEmail: (?P.+)\n(Remarks: (?P.+)\n){0,1}Changed: (?P.+)", # DeNIC ]