From a4f29a379e185c1c7e5245aad850e6eb086a53e7 Mon Sep 17 00:00:00 2001 From: Sven Slootweg Date: Fri, 27 Jun 2014 18:29:01 +0200 Subject: [PATCH] Implement HKDNR (.hk) parsing. Fixes #24. --- pythonwhois/parse.py | 26 ++++++++ test/data/unwire.hk | 110 +++++++++++++++++++++++++++++++ test/target_default/unwire.hk | 1 + test/target_normalized/unwire.hk | 1 + 4 files changed, 138 insertions(+) create mode 100644 test/data/unwire.hk create mode 100644 test/target_default/unwire.hk create mode 100644 test/target_normalized/unwire.hk diff --git a/pythonwhois/parse.py b/pythonwhois/parse.py index 8a894a5..caf1951 100644 --- a/pythonwhois/parse.py +++ b/pythonwhois/parse.py @@ -31,6 +31,7 @@ grammar = { 'created:\s*(?P.+)', '\[Registered Date\]\s*(?P.+)', 'created-date:\s*(?P.+)', + 'Domain Name Commencement Date: (?P.+)', 'registered:\s*(?P.+)', 'registration:\s*(?P.+)'], 'expiration_date': ['\[Expires on\]\s*(?P.+)', @@ -261,6 +262,16 @@ def parse_raw_whois(raw_data, normalized=[], never_query_handles=True, handle_se match = re.search('Registrar\n Organization: (.+)\n', segment) if match is not None: data["registrar"] = [match.group(1).strip()] + # HKDNR (.hk) provides a weird nameserver format with too much whitespace + match = re.search("Name Servers Information:\n\n([\s\S]*?\n)\n", segment) + if match is not None: + chunk = match.group(1) + for match in re.findall("(.+)\n", chunk): + match = match.split()[0] + try: + data["nameservers"].append(match.strip()) + except KeyError as e: + data["nameservers"] = [match.strip()] data["contacts"] = parse_registrants(raw_data, never_query_handles, handle_server) @@ -550,6 +561,7 @@ def parse_registrants(data, never_query_handles=True, handle_server=""): "Domain Holder: (?P.+)\n(?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?))?)?)?)?)?)?, (?P.+)\n(?P.+)\n(?P[A-Z]+)\n", # .co.th, format 2 "Domain Holder: (?P.+)\n(?P.+)\n(?:(?P.+)\n)?(?:(?P.+)\n)?.+?, (?P.+)\n(?P.+)\n(?P.+)\n(?P[A-Z]+)\n", # .co.th, format 3 "Domain Holder: (?P.+)\n(?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?))?)?)?)?)?)?\n(?P.+),? (?P[A-Z]{2,3})(?: [A-Z0-9]+)?\n(?P.+)\n(?P[A-Z]+)\n", # .co.th, format 4 + "Registrant Contact Information:\n\nCompany English Name \(It should be the same as the registered/corporation name on your Business Register Certificate or relevant documents\):(?P.+)\nCompany Chinese name:(?P.+)\nAddress: (?P.+)\nCountry: (?P.+)\nEmail: (?P.+)\n", # HKDNR (.hk) "owner:\s+(?P.+)", # .br "person:\s+(?P.+)", # nic.ru (person) "org:\s+(?P.+)", # nic.ru (organization) @@ -585,6 +597,7 @@ def parse_registrants(data, never_query_handles=True, handle_server=""): "Tech Contact: (?P.+)\n(?P.+) (?P[^\s]+)\n(?P.+)\n(?P[A-Z]+)\n", # .co.th, format 4 "Tech Contact: (?P.+)\n(?P.+)\n(?P.+)\n(?P.+) (?P[^\s]+)\n(?P.+)\n(?P[A-Z]+)\n", # .co.th, format 5 "Tech Contact: (?P.+)\n(?P.+)\n(?P.+)\n(?P.+)\n(?:(?P.+)\n)?(?P.+)\n(?P.+)\n(?P[A-Z]+)\n", # .co.th, format 6 + "Technical Contact Information:\n\n(?:Given name: (?P.+)\n)?(?:Family name: (?P.+)\n)?(?:Company name: (?P.+)\n)?Address: (?P.+)\nCountry: (?P.+)\nPhone: (?P.*)\nFax: (?P.*)\nEmail: (?P.+)\n(?:Account Name: (?P.+)\n)?", # HKDNR (.hk) ] admin_contact_regexes = [ @@ -608,6 +621,7 @@ def parse_registrants(data, never_query_handles=True, handle_server=""): " Administrative contact:\n (?P.+)\n (?P.*)\n (?P.+)\n (?P.+) (?P\S+),[ ]+(?P.+)\n (?P.+)\n (?P.+)\n (?P.*)\n (?P.*)", # .am "Administrative Contact:\n Name: (?P.+)\n City: (?P.+)\n State: (?P.+)\n Country: (?P.+)\n", # Akky (.com.mx) "\[Tech-C\]\nType: (?P.+)\nName: (?P.+)\n(Organisation: (?P.+)\n){0,1}(Address: (?P.+)\n){1}(Address: (?P.+)\n){0,1}(Address: (?P.+)\n){0,1}(Address: (?P.+)\n){0,1}PostalCode: (?P.+)\nCity: (?P.+)\nCountryCode: (?P[A-Za-z]{2})\nPhone: (?P.+)\nFax: (?P.+)\nEmail: (?P.+)\n(Remarks: (?P.+)\n){0,1}Changed: (?P.+)", # DeNIC + "Administrative Contact Information:\n\n(?:Given name: (?P.+)\n)?(?:Family name: (?P.+)\n)?(?:Company name: (?P.+)\n)?Address: (?P.+)\nCountry: (?P.+)\nPhone: (?P.*)\nFax: (?P.*)\nEmail: (?P.+)\n(?:Account Name: (?P.+)\n)?", # HKDNR (.hk) ] billing_contact_regexes = [ @@ -752,6 +766,18 @@ def parse_registrants(data, never_query_handles=True, handle_server=""): break i += 1 obj["street"] = "\n".join(street_items) + if "organization1" in obj: # This is to deal with eg. HKDNR, who allow organization names in multiple languages. + organization_items = [] + i = 1 + while True: + try: + if obj["organization%d" % i].strip() != "": + organization_items.append(obj["organization%d" % i]) + del obj["organization%d" % i] + except KeyError as e: + break + i += 1 + obj["organization"] = "\n".join(organization_items) if 'changedate' in obj: obj['changedate'] = parse_dates([obj['changedate']])[0] if 'creationdate' in obj: diff --git a/test/data/unwire.hk b/test/data/unwire.hk new file mode 100644 index 0000000..f88aaae --- /dev/null +++ b/test/data/unwire.hk @@ -0,0 +1,110 @@ + + ------------------------------------------------------------------------------- + Whois server by HKIRC + ------------------------------------------------------------------------------- + .hk top level Domain names can be registered via HKIRC-Accredited Registrars. + Go to https://www.hkirc.hk/content.jsp?id=280 for details. + ------------------------------------------------------------------------------- + + + +Domain Name: UNWIRE.HK + +Domain Status: Active + +Contract Version: HKDNR latest version + +Registrar Name: Hong Kong Domain Name Registration Company Limited + +Registrar Contact Information: Email: enquiry@hkdnr.hk Hotline: +852 2319 1313 + +Reseller: + + + + +Registrant Contact Information: + +Company English Name (It should be the same as the registered/corporation name on your Business Register Certificate or relevant documents): UNWIRE LIMITED +Company Chinese name: +Address: HK +Country: HK +Email: mkt@bmedia.hk +Domain Name Commencement Date: 24-06-2009 +Expiry Date: 24-06-2021 +Re-registration Status: Complete + + + +Administrative Contact Information: + +Given name: AMAZING +Family name: SHREK +Company name: UNWIRE LIMITED +Address: HK +Country: HK +Phone: +852-123456 +Fax: +Email: mkt@bmedia.hk +Account Name: HK3507379T + + + + +Technical Contact Information: + +Family name: CHEUNG +Company name: UNWIRE LIMITED +Address: HK +Country: HK +Phone: +852-123456 +Fax: +Email: mkt@bmedia.hk + + + + +Name Servers Information: + +NORM.NS.CLOUDFLARE.COM +ZOE.NS.CLOUDFLARE.COM + + + +Status Information: + +Domain Prohibit Status: + + + + ------------------------------------------------------------------------------- + The Registry contains ONLY .com.hk, .net.hk, .edu.hk, .org.hk, + .gov.hk, idv.hk. and .hk $domains. + ------------------------------------------------------------------------------- + +WHOIS Terms of Use +By using this WHOIS search enquiry service you agree to these terms of use. +The data in HKDNR's WHOIS search engine is for information purposes only and HKDNR does not guarantee the accuracy of the data. The data is provided to assist people to obtain information about the registration record of domain names registered by HKDNR. You agree to use the data for lawful purposes only. + +You are not authorised to use high-volume, electronic or automated processes to access, query or harvest data from this WHOIS search enquiry service. + +You agree that you will not and will not allow anyone else to: + +a. use the data for mass unsolicited commercial advertising of any sort via any medium including telephone, email or fax; or + +b. enable high volume, automated or electronic processes that apply to HKDNR or its computer systems including the WHOIS search enquiry service; or + +c. without the prior written consent of HKDNR compile, repackage, disseminate, disclose to any third party or use the data for a purpose other than obtaining information about a domain name registration record; or + +d. use such data to derive an economic benefit for yourself. + +HKDNR in its sole discretion may terminate your access to the WHOIS search enquiry service (including, without limitation, blocking your IP address) at any time including, without limitation, for excessive use of the WHOIS search enquiry service. + +HKDNR may modify these terms of use at any time by publishing the modified terms of use on its website. + + + + + + + diff --git a/test/target_default/unwire.hk b/test/target_default/unwire.hk new file mode 100644 index 0000000..555e6f6 --- /dev/null +++ b/test/target_default/unwire.hk @@ -0,0 +1 @@ +{"status": ["Active", "Complete"], "contacts": {"admin": {"handle": "HK3507379T", "name": "AMAZING SHREK", "firstname": "AMAZING", "country": "HK", "phone": "+852-123456", "street": "HK", "lastname": "SHREK", "organization": "UNWIRE LIMITED", "email": "mkt@bmedia.hk"}, "tech": {"name": "CHEUNG", "country": "HK", "phone": "+852-123456", "street": "HK", "lastname": "CHEUNG", "organization": "UNWIRE LIMITED", "email": "mkt@bmedia.hk"}, "registrant": {"country": "HK", "street": "HK", "organization": "UNWIRE LIMITED", "email": "mkt@bmedia.hk"}, "billing": null}, "nameservers": ["NORM.NS.CLOUDFLARE.COM", "ZOE.NS.CLOUDFLARE.COM"], "expiration_date": ["2021-06-24T00:00:00"], "creation_date": ["2009-06-24T00:00:00", "2009-06-24T00:00:00"], "raw": [" \n -------------------------------------------------------------------------------\n Whois server by HKIRC\n -------------------------------------------------------------------------------\n .hk top level Domain names can be registered via HKIRC-Accredited Registrars. \n Go to https://www.hkirc.hk/content.jsp?id=280 for details. \n -------------------------------------------------------------------------------\n\n\n\nDomain Name: UNWIRE.HK \n\nDomain Status: Active \n\nContract Version: HKDNR latest version \n\nRegistrar Name: Hong Kong Domain Name Registration Company Limited\n\nRegistrar Contact Information: Email: enquiry@hkdnr.hk Hotline: +852 2319 1313 \n\nReseller: \n\n\n\n\nRegistrant Contact Information:\n\nCompany English Name (It should be the same as the registered/corporation name on your Business Register Certificate or relevant documents): UNWIRE LIMITED\nCompany Chinese name: \nAddress: HK \nCountry: HK\nEmail: mkt@bmedia.hk \nDomain Name Commencement Date: 24-06-2009\nExpiry Date: 24-06-2021 \nRe-registration Status: Complete \n\n\n\nAdministrative Contact Information:\n\nGiven name: AMAZING \nFamily name: SHREK \nCompany name: UNWIRE LIMITED\nAddress: HK \nCountry: HK\nPhone: +852-123456\nFax: \nEmail: mkt@bmedia.hk\nAccount Name: HK3507379T\n\n\n\n\nTechnical Contact Information:\n\nFamily name: CHEUNG \nCompany name: UNWIRE LIMITED\nAddress: HK \nCountry: HK\nPhone: +852-123456\nFax: \nEmail: mkt@bmedia.hk\n\n\n\n\nName Servers Information:\n\nNORM.NS.CLOUDFLARE.COM\nZOE.NS.CLOUDFLARE.COM\n\n\n\nStatus Information:\n\nDomain Prohibit Status: \n\n\n\n -------------------------------------------------------------------------------\n The Registry contains ONLY .com.hk, .net.hk, .edu.hk, .org.hk,\n .gov.hk, idv.hk. and .hk $domains.\n -------------------------------------------------------------------------------\n\nWHOIS Terms of Use \nBy using this WHOIS search enquiry service you agree to these terms of use.\nThe data in HKDNR's WHOIS search engine is for information purposes only and HKDNR does not guarantee the accuracy of the data. The data is provided to assist people to obtain information about the registration record of domain names registered by HKDNR. You agree to use the data for lawful purposes only.\n\nYou are not authorised to use high-volume, electronic or automated processes to access, query or harvest data from this WHOIS search enquiry service.\n\nYou agree that you will not and will not allow anyone else to:\n\na. use the data for mass unsolicited commercial advertising of any sort via any medium including telephone, email or fax; or\n\nb. enable high volume, automated or electronic processes that apply to HKDNR or its computer systems including the WHOIS search enquiry service; or\n\nc. without the prior written consent of HKDNR compile, repackage, disseminate, disclose to any third party or use the data for a purpose other than obtaining information about a domain name registration record; or\n\nd. use such data to derive an economic benefit for yourself.\n\nHKDNR in its sole discretion may terminate your access to the WHOIS search enquiry service (including, without limitation, blocking your IP address) at any time including, without limitation, for excessive use of the WHOIS search enquiry service.\n\nHKDNR may modify these terms of use at any time by publishing the modified terms of use on its website.\n\n\n\n\n\n\n\n"], "registrar": ["Hong Kong Domain Name Registration Company Limited"], "emails": ["enquiry@hkdnr.hk"]} \ No newline at end of file diff --git a/test/target_normalized/unwire.hk b/test/target_normalized/unwire.hk new file mode 100644 index 0000000..bc3604a --- /dev/null +++ b/test/target_normalized/unwire.hk @@ -0,0 +1 @@ +{"status": ["Active", "Complete"], "contacts": {"admin": {"handle": "HK3507379T", "name": "Amazing Shrek", "firstname": "AMAZING", "country": "HK", "phone": "+852-123456", "street": "HK", "lastname": "SHREK", "organization": "Unwire Limited", "email": "mkt@bmedia.hk"}, "tech": {"name": "CHEUNG", "country": "HK", "phone": "+852-123456", "street": "HK", "lastname": "CHEUNG", "organization": "Unwire Limited", "email": "mkt@bmedia.hk"}, "registrant": {"country": "HK", "street": "HK", "organization": "Unwire Limited", "email": "mkt@bmedia.hk"}, "billing": null}, "nameservers": ["norm.ns.cloudflare.com", "zoe.ns.cloudflare.com"], "expiration_date": ["2021-06-24T00:00:00"], "creation_date": ["2009-06-24T00:00:00", "2009-06-24T00:00:00"], "raw": [" \n -------------------------------------------------------------------------------\n Whois server by HKIRC\n -------------------------------------------------------------------------------\n .hk top level Domain names can be registered via HKIRC-Accredited Registrars. \n Go to https://www.hkirc.hk/content.jsp?id=280 for details. \n -------------------------------------------------------------------------------\n\n\n\nDomain Name: UNWIRE.HK \n\nDomain Status: Active \n\nContract Version: HKDNR latest version \n\nRegistrar Name: Hong Kong Domain Name Registration Company Limited\n\nRegistrar Contact Information: Email: enquiry@hkdnr.hk Hotline: +852 2319 1313 \n\nReseller: \n\n\n\n\nRegistrant Contact Information:\n\nCompany English Name (It should be the same as the registered/corporation name on your Business Register Certificate or relevant documents): UNWIRE LIMITED\nCompany Chinese name: \nAddress: HK \nCountry: HK\nEmail: mkt@bmedia.hk \nDomain Name Commencement Date: 24-06-2009\nExpiry Date: 24-06-2021 \nRe-registration Status: Complete \n\n\n\nAdministrative Contact Information:\n\nGiven name: AMAZING \nFamily name: SHREK \nCompany name: UNWIRE LIMITED\nAddress: HK \nCountry: HK\nPhone: +852-123456\nFax: \nEmail: mkt@bmedia.hk\nAccount Name: HK3507379T\n\n\n\n\nTechnical Contact Information:\n\nFamily name: CHEUNG \nCompany name: UNWIRE LIMITED\nAddress: HK \nCountry: HK\nPhone: +852-123456\nFax: \nEmail: mkt@bmedia.hk\n\n\n\n\nName Servers Information:\n\nNORM.NS.CLOUDFLARE.COM\nZOE.NS.CLOUDFLARE.COM\n\n\n\nStatus Information:\n\nDomain Prohibit Status: \n\n\n\n -------------------------------------------------------------------------------\n The Registry contains ONLY .com.hk, .net.hk, .edu.hk, .org.hk,\n .gov.hk, idv.hk. and .hk $domains.\n -------------------------------------------------------------------------------\n\nWHOIS Terms of Use \nBy using this WHOIS search enquiry service you agree to these terms of use.\nThe data in HKDNR's WHOIS search engine is for information purposes only and HKDNR does not guarantee the accuracy of the data. The data is provided to assist people to obtain information about the registration record of domain names registered by HKDNR. You agree to use the data for lawful purposes only.\n\nYou are not authorised to use high-volume, electronic or automated processes to access, query or harvest data from this WHOIS search enquiry service.\n\nYou agree that you will not and will not allow anyone else to:\n\na. use the data for mass unsolicited commercial advertising of any sort via any medium including telephone, email or fax; or\n\nb. enable high volume, automated or electronic processes that apply to HKDNR or its computer systems including the WHOIS search enquiry service; or\n\nc. without the prior written consent of HKDNR compile, repackage, disseminate, disclose to any third party or use the data for a purpose other than obtaining information about a domain name registration record; or\n\nd. use such data to derive an economic benefit for yourself.\n\nHKDNR in its sole discretion may terminate your access to the WHOIS search enquiry service (including, without limitation, blocking your IP address) at any time including, without limitation, for excessive use of the WHOIS search enquiry service.\n\nHKDNR may modify these terms of use at any time by publishing the modified terms of use on its website.\n\n\n\n\n\n\n\n"], "registrar": ["Hong Kong Domain Name Registration Company Limited"], "emails": ["enquiry@hkdnr.hk"]} \ No newline at end of file