2.1.0 release with Python 3 support

master
Sven Slootweg 11 years ago
commit 59c031acde

1
.gitignore vendored

@ -2,3 +2,4 @@
build
dist
*.egg-info
.tox

@ -69,7 +69,7 @@ Make sure to verify (using `pwhois` or otherwise) that the WHOIS data for the do
./test.py update thedomain.com
### Running the full test suite
### Running all tests
./test.py run all
@ -77,6 +77,10 @@ Make sure to verify (using `pwhois` or otherwise) that the WHOIS data for the do
./test.py run thedomain.com
### Running the full test suite including support for multiple python versions
tox
### Generating documentation
You need [ZippyDoc](http://cryto.net/zippydoc) (which can be installed through `pip install zippydoc`).

@ -1,4 +1,5 @@
import re, datetime
from __future__ import print_function
import re, sys, datetime
grammar = {
"_data": {
@ -136,14 +137,25 @@ grammar = {
}
}
if sys.version_info < (3, 0):
def is_string(data):
"""Test for string with support for python 2."""
return isinstance(data, basestring)
else:
def is_string(data):
"""Test for string with support for python 3."""
return isinstance(data, str)
def parse_raw_whois(raw_data, normalized=[]):
data = {}
raw_data = [segment.replace("\r", "") for segment in raw_data] # Carriage returns are the devil
for segment in raw_data:
for rule_key, rule_regexes in grammar['_data'].iteritems():
if data.has_key(rule_key) == False:
for rule_key, rule_regexes in grammar['_data'].items():
if (rule_key in data) == False:
for line in segment.splitlines():
for regex in rule_regexes:
result = re.search(regex, line, re.IGNORECASE)
@ -153,7 +165,7 @@ def parse_raw_whois(raw_data, normalized=[]):
if val != "":
try:
data[rule_key].append(val)
except KeyError, e:
except KeyError as e:
data[rule_key] = [val]
# Whois.com is a bit special... Fabulous.com also seems to use this format.
@ -163,7 +175,7 @@ def parse_raw_whois(raw_data, normalized=[]):
for match in re.findall("[ ]+(.+)\n", chunk):
try:
data["nameservers"].append(match.strip())
except KeyError, e:
except KeyError as e:
data["nameservers"] = [match.strip()]
# Nominet also needs some special attention
match = re.search(" Registrar:\n (.+)\n", segment)
@ -176,7 +188,7 @@ def parse_raw_whois(raw_data, normalized=[]):
match = match.split()[0]
try:
data["nameservers"].append(match.strip())
except KeyError, e:
except KeyError as e:
data["nameservers"] = [match.strip()]
# .am plays the same game
match = re.search(" DNS servers:([\s\S]*?\n)\n", segment)
@ -186,7 +198,7 @@ def parse_raw_whois(raw_data, normalized=[]):
match = match.split()[0]
try:
data["nameservers"].append(match.strip())
except KeyError, e:
except KeyError as e:
data["nameservers"] = [match.strip()]
# SIDN isn't very standard either.
match = re.search("Registrar:\n\s+(\S.*)", segment)
@ -199,7 +211,7 @@ def parse_raw_whois(raw_data, normalized=[]):
match = match.split()[0]
try:
data["nameservers"].append(match.strip())
except KeyError, e:
except KeyError as e:
data["nameservers"] = [match.strip()]
# The .ie WHOIS server puts ambiguous status information in an unhelpful order
match = re.search('ren-status:\s*(.+)', segment)
@ -212,34 +224,34 @@ def parse_raw_whois(raw_data, normalized=[]):
try:
data['expiration_date'] = remove_duplicates(data['expiration_date'])
data['expiration_date'] = parse_dates(data['expiration_date'])
except KeyError, e:
except KeyError as e:
pass # Not present
try:
data['creation_date'] = remove_duplicates(data['creation_date'])
data['creation_date'] = parse_dates(data['creation_date'])
except KeyError, e:
except KeyError as e:
pass # Not present
try:
data['updated_date'] = remove_duplicates(data['updated_date'])
data['updated_date'] = parse_dates(data['updated_date'])
except KeyError, e:
except KeyError as e:
pass # Not present
try:
data['nameservers'] = remove_duplicates([ns.rstrip(".") for ns in data['nameservers']])
except KeyError, e:
except KeyError as e:
pass # Not present
try:
data['emails'] = remove_duplicates(data['emails'])
except KeyError, e:
except KeyError as e:
pass # Not present
try:
data['registrar'] = remove_duplicates(data['registrar'])
except KeyError, e:
except KeyError as e:
pass # Not present
# Remove e-mail addresses if they are already listed for any of the contacts
@ -248,14 +260,14 @@ def parse_raw_whois(raw_data, normalized=[]):
if data["contacts"][contact] is not None:
try:
known_emails.append(data["contacts"][contact]["email"])
except KeyError, e:
except KeyError as e:
pass # No e-mail recorded for this contact...
try:
data['emails'] = [email for email in data["emails"] if email not in known_emails]
except KeyError, e:
except KeyError as e:
pass # Not present
for key in data.keys():
for key in list(data.keys()):
if data[key] is None or len(data[key]) == 0:
del data[key]
@ -269,23 +281,23 @@ def parse_raw_whois(raw_data, normalized=[]):
def normalize_data(data, normalized):
for key in ("nameservers", "emails", "whois_server"):
if key in data and data[key] is not None and (normalized == True or key in normalized):
if isinstance(data[key], basestring):
if is_string(data[key]):
data[key] = data[key].lower()
else:
data[key] = [item.lower() for item in data[key]]
for key, threshold in (("registrar", 4), ("status", 3)):
if key in data and data[key] is not None and (normalized == True or key in normalized):
if isinstance(data[key], basestring):
if is_string(data[key]):
data[key] = normalize_name(data[key], abbreviation_threshold=threshold, length_threshold=1)
else:
data[key] = [normalize_name(item, abbreviation_threshold=threshold, length_threshold=1) for item in data[key]]
for contact_type, contact in data['contacts'].iteritems():
for contact_type, contact in data['contacts'].items():
if contact is not None:
for key in ("email",):
if key in contact and contact[key] is not None and (normalized == True or key in normalized):
if isinstance(contact[key], basestring):
if isinstance(contact[key], str):
contact[key] = contact[key].lower()
else:
contact[key] = [item.lower() for item in contact[key]]
@ -298,10 +310,10 @@ def normalize_data(data, normalized):
if key in contact and contact[key] is not None and (normalized == True or key in normalized):
contact[key] = normalize_name(contact[key], abbreviation_threshold=3, length_threshold=3)
for key in contact.keys():
for key in list(contact.keys()):
try:
contact[key] = contact[key].strip(", ")
except AttributeError, e:
except AttributeError as e:
pass # Not a string
return data
@ -368,37 +380,37 @@ def parse_dates(dates):
# This will require some more guesswork - some WHOIS servers present the name of the month
try:
month = int(result.group("month"))
except ValueError, e:
except ValueError as e:
# Apparently not a number. Look up the corresponding number.
try:
month = grammar['_months'][result.group("month").lower()]
except KeyError, e:
except KeyError as e:
# Unknown month name, default to 0
month = 0
try:
hour = int(result.group("hour"))
except IndexError, e:
except IndexError as e:
hour = 0
except TypeError, e:
except TypeError as e:
hour = 0
try:
minute = int(result.group("minute"))
except IndexError, e:
except IndexError as e:
minute = 0
except TypeError, e:
except TypeError as e:
minute = 0
try:
second = int(result.group("second"))
except IndexError, e:
except IndexError as e:
second = 0
except TypeError, e:
except TypeError as e:
second = 0
break
except ValueError, e:
except ValueError as e:
# Something went horribly wrong, maybe there is no valid date present?
year = 0
month = 0
@ -406,16 +418,16 @@ def parse_dates(dates):
hour = 0
minute = 0
second = 0
print e.message
print(e.message)
try:
if year > 0:
try:
parsed_dates.append(datetime.datetime(year, month, day, hour, minute, second))
except ValueError, e:
except ValueError as e:
# We might have gotten the day and month the wrong way around, let's try it the other way around
# If you're not using an ISO-standard date format, you're an evil registrar!
parsed_dates.append(datetime.datetime(year, day, month, hour, minute, second))
except UnboundLocalError, e:
except UnboundLocalError as e:
pass
if len(parsed_dates) > 0:
@ -626,7 +638,7 @@ def parse_registrants(data):
# Post-processing
for obj in (registrant, tech_contact, billing_contact, admin_contact):
if obj is not None:
for key in obj.keys():
for key in list(obj.keys()):
if obj[key] is None or obj[key].strip() == "": # Just chomp all surrounding whitespace
del obj[key]
else:
@ -642,7 +654,7 @@ def parse_registrants(data):
try:
street_items.append(obj["street%d" % i])
del obj["street%d" % i]
except KeyError, e:
except KeyError as e:
break
i += 1
obj["street"] = "\n".join(street_items)

@ -1,12 +1,13 @@
from setuptools import setup
setup(name='pythonwhois',
version='2.0.5',
version='2.1.0',
description='Module for retrieving and parsing the WHOIS data for a domain. Supports most domains. No dependencies.',
author='Sven Slootweg',
author_email='pythonwhois@cryto.net',
url='http://cryto.net/pythonwhois',
packages=['pythonwhois'],
install_requires=['argparse'],
provides=['pythonwhois'],
scripts=["pwhois"],
license="WTFPL"

@ -1,6 +1,27 @@
#!/usr/bin/env python2
import sys, argparse, os, pythonwhois, json, datetime
import sys, argparse, os, pythonwhois, json, datetime, codecs
import pkgutil
import encodings
def get_codecs():
"""Dynamically get list of codecs in python."""
false_positives = set(["aliases"])
found = set(name for imp, name, ispkg in pkgutil.iter_modules(encodings.__path__) if not ispkg)
found.difference_update(false_positives)
return found
def read_encoded_file(file_path):
"""Try reading file using all codecs. Return the first succesfull one."""
for encoding in get_codecs():
try:
with codecs.open(file_path, "r", encoding) as f:
return f.read()
except Exception:
pass
parser = argparse.ArgumentParser(description="Runs or modifies the test suite for python-whois.")
parser.add_argument("mode", nargs=1, choices=["run", "update"], default="run", help="Whether to run or update the tests. Only update if you know what you're doing!")
@ -14,7 +35,7 @@ ENDC = '\033[0m'
def encoded_json_dumps(obj):
try:
return json.dumps(obj, default=json_fallback)
except UnicodeDecodeError, e:
except UnicodeDecodeError as e:
return json.dumps(recursive_encode(obj, "latin-1"), default=json_fallback)
def json_fallback(obj):
@ -24,7 +45,7 @@ def json_fallback(obj):
return obj
def recursive_encode(obj, encoding):
for key in obj.keys():
for key in list(obj.keys()):
if isinstance(obj[key], dict):
obj[key] = recursive_encode(obj[key], encoding)
elif isinstance(obj[key], list):
@ -74,18 +95,26 @@ if args.mode[0] == "run":
suites = []
for target in targets:
try:
with open(os.path.join("test/data", target), "r") as f:
with codecs.open(os.path.join("test/data", target), "r") as f:
data = f.read().split("\n--\n")
except IOError, e:
except IOError as e:
sys.stderr.write("Invalid domain %(domain)s specified. No test case or base data exists.\n" % {"domain": target})
errors = True
continue
except UnicodeDecodeError:
try:
# Try cp1252 (ufpa.br uses that)
with codecs.open(os.path.join("test/data", target), "r", 'cp1252') as f:
data = f.read().split("\n--\n")
except UnicodeDecodeError as e:
# Fall back to trying all registered codecs
data = read_encoded_file(os.path.join("test/data", target)).split("\n--\n")
try:
with open(os.path.join("test/target_default", target), "r") as f:
with codecs.open(os.path.join("test/target_default", target), "r") as f:
default = f.read()
with open(os.path.join("test/target_normalized", target), "r") as f:
with codecs.open(os.path.join("test/target_normalized", target), "r") as f:
normalized = f.read()
except IOError, e:
except IOError as e:
sys.stderr.write("Missing target data for domain %(domain)s. Run `./test.py update %(domain)s` to correct this, after verifying that pythonwhois can correctly parse this particular domain.\n" % {"domain": target})
errors = True
continue
@ -152,10 +181,10 @@ elif args.mode[0] == "update":
updates = []
for target in targets:
try:
with open(os.path.join("test/data", target), "r") as f:
with codecs.open(os.path.join("test/data", target), "r") as f:
data = f.read().split("\n--\n")
updates.append((target, data))
except IOError, e:
except IOError as e:
sys.stderr.write("Invalid domain %(domain)s specified. No base data exists.\n" % {"domain": target})
errors = True
continue
@ -166,8 +195,8 @@ elif args.mode[0] == "update":
for target, data in updates:
default = pythonwhois.parse.parse_raw_whois(data)
normalized = pythonwhois.parse.parse_raw_whois(data, normalized=True)
with open(os.path.join("test/target_default", target), "w") as f:
with codecs.open(os.path.join("test/target_default", target), "w") as f:
f.write(encoded_json_dumps(default))
with open(os.path.join("test/target_normalized", target), "w") as f:
with codecs.open(os.path.join("test/target_normalized", target), "w") as f:
f.write(encoded_json_dumps(normalized))
print "Generated target data for %s." % target
print("Generated target data for %s." % target)

@ -0,0 +1,7 @@
[tox]
envlist = py26,py27,py33
[testenv]
usedevelop = True
setenv = VIRTUAL_ENV={envdir}
commands = python test.py run all
Loading…
Cancel
Save