Python 3 compatibility

Including converting one of the test data files to
UTF-8. It was Windows encoded which failed with
python 3's file.read encoding.
master
Ziad Sawalha 10 years ago
parent 1af983ad4f
commit 1e79e33126

@ -1,14 +1,6 @@
from __future__ import print_function
import re, sys, datetime
if sys.version_info[0] >= 3:
def iteritems(d):
return iter(d.items())
else:
def iteritems(d):
return d.iteritems()
grammar = {
"_data": {
'id': ['Domain ID:[ ]*(?P<val>.+)'],
@ -145,14 +137,25 @@ grammar = {
}
}
if sys.version_info < (3, 0):
def is_string(data):
"""Test for string with support for python 2."""
return isinstance(data, basestring)
else:
def is_string(data):
"""Test for string with support for python 3."""
return isinstance(data, str)
def parse_raw_whois(raw_data, normalized=[]):
data = {}
raw_data = [segment.replace("\r", "") for segment in raw_data] # Carriage returns are the devil
for segment in raw_data:
for rule_key, rule_regexes in iteritems(grammar['_data']):
if rule_key not in data:
for rule_key, rule_regexes in grammar['_data'].items():
if (rule_key in data) == False:
for line in segment.splitlines():
for regex in rule_regexes:
result = re.search(regex, line, re.IGNORECASE)
@ -264,7 +267,7 @@ def parse_raw_whois(raw_data, normalized=[]):
except KeyError as e:
pass # Not present
for key in data.keys():
for key in list(data.keys()):
if data[key] is None or len(data[key]) == 0:
del data[key]
@ -278,23 +281,23 @@ def parse_raw_whois(raw_data, normalized=[]):
def normalize_data(data, normalized):
for key in ("nameservers", "emails", "whois_server"):
if key in data and data[key] is not None and (normalized == True or key in normalized):
if isinstance(data[key], basestring):
if is_string(data[key]):
data[key] = data[key].lower()
else:
data[key] = [item.lower() for item in data[key]]
for key, threshold in (("registrar", 4), ("status", 3)):
if key in data and data[key] is not None and (normalized == True or key in normalized):
if isinstance(data[key], basestring):
if is_string(data[key]):
data[key] = normalize_name(data[key], abbreviation_threshold=threshold, length_threshold=1)
else:
data[key] = [normalize_name(item, abbreviation_threshold=threshold, length_threshold=1) for item in data[key]]
for contact_type, contact in iteritems(data['contacts']):
for contact_type, contact in data['contacts'].items():
if contact is not None:
for key in ("email",):
if key in contact and contact[key] is not None and (normalized == True or key in normalized):
if isinstance(contact[key], basestring):
if isinstance(contact[key], str):
contact[key] = contact[key].lower()
else:
contact[key] = [item.lower() for item in contact[key]]
@ -307,7 +310,7 @@ def normalize_data(data, normalized):
if key in contact and contact[key] is not None and (normalized == True or key in normalized):
contact[key] = normalize_name(contact[key], abbreviation_threshold=3, length_threshold=3)
for key in contact.keys():
for key in list(contact.keys()):
try:
contact[key] = contact[key].strip(", ")
except AttributeError as e:
@ -635,7 +638,7 @@ def parse_registrants(data):
# Post-processing
for obj in (registrant, tech_contact, billing_contact, admin_contact):
if obj is not None:
for key in obj.keys():
for key in list(obj.keys()):
if obj[key] is None or obj[key].strip() == "": # Just chomp all surrounding whitespace
del obj[key]
else:

@ -1,6 +1,27 @@
#!/usr/bin/env python2
import sys, argparse, os, pythonwhois, json, datetime
import sys, argparse, os, pythonwhois, json, datetime, codecs
import pkgutil
import encodings
def get_codecs():
"""Dynamically get list of codecs in python."""
false_positives = set(["aliases"])
found = set(name for imp, name, ispkg in pkgutil.iter_modules(encodings.__path__) if not ispkg)
found.difference_update(false_positives)
return found
def read_encoded_file(file_path):
"""Try reading file using all codecs. Return the first succesfull one."""
for encoding in get_codecs():
try:
with codecs.open(file_path, "r", encoding) as f:
return f.read()
except Exception:
pass
parser = argparse.ArgumentParser(description="Runs or modifies the test suite for python-whois.")
parser.add_argument("mode", nargs=1, choices=["run", "update"], default="run", help="Whether to run or update the tests. Only update if you know what you're doing!")
@ -14,7 +35,7 @@ ENDC = '\033[0m'
def encoded_json_dumps(obj):
try:
return json.dumps(obj, default=json_fallback)
except UnicodeDecodeError, e:
except UnicodeDecodeError as e:
return json.dumps(recursive_encode(obj, "latin-1"), default=json_fallback)
def json_fallback(obj):
@ -24,7 +45,7 @@ def json_fallback(obj):
return obj
def recursive_encode(obj, encoding):
for key in obj.keys():
for key in list(obj.keys()):
if isinstance(obj[key], dict):
obj[key] = recursive_encode(obj[key], encoding)
elif isinstance(obj[key], list):
@ -74,18 +95,26 @@ if args.mode[0] == "run":
suites = []
for target in targets:
try:
with open(os.path.join("test/data", target), "r") as f:
with codecs.open(os.path.join("test/data", target), "r") as f:
data = f.read().split("\n--\n")
except IOError, e:
except IOError as e:
sys.stderr.write("Invalid domain %(domain)s specified. No test case or base data exists.\n" % {"domain": target})
errors = True
continue
except UnicodeDecodeError:
try:
# Try cp1252 (ufpa.br uses that)
with codecs.open(os.path.join("test/data", target), "r", 'cp1252') as f:
data = f.read().split("\n--\n")
except UnicodeDecodeError as e:
# Fall back to trying all registered codecs
data = read_encoded_file(os.path.join("test/data", target)).split("\n--\n")
try:
with open(os.path.join("test/target_default", target), "r") as f:
with codecs.open(os.path.join("test/target_default", target), "r") as f:
default = f.read()
with open(os.path.join("test/target_normalized", target), "r") as f:
with codecs.open(os.path.join("test/target_normalized", target), "r") as f:
normalized = f.read()
except IOError, e:
except IOError as e:
sys.stderr.write("Missing target data for domain %(domain)s. Run `./test.py update %(domain)s` to correct this, after verifying that pythonwhois can correctly parse this particular domain.\n" % {"domain": target})
errors = True
continue
@ -152,10 +181,10 @@ elif args.mode[0] == "update":
updates = []
for target in targets:
try:
with open(os.path.join("test/data", target), "r") as f:
with codecs.open(os.path.join("test/data", target), "r") as f:
data = f.read().split("\n--\n")
updates.append((target, data))
except IOError, e:
except IOError as e:
sys.stderr.write("Invalid domain %(domain)s specified. No base data exists.\n" % {"domain": target})
errors = True
continue
@ -166,8 +195,8 @@ elif args.mode[0] == "update":
for target, data in updates:
default = pythonwhois.parse.parse_raw_whois(data)
normalized = pythonwhois.parse.parse_raw_whois(data, normalized=True)
with open(os.path.join("test/target_default", target), "w") as f:
with codecs.open(os.path.join("test/target_default", target), "w") as f:
f.write(encoded_json_dumps(default))
with open(os.path.join("test/target_normalized", target), "w") as f:
with codecs.open(os.path.join("test/target_normalized", target), "w") as f:
f.write(encoded_json_dumps(normalized))
print "Generated target data for %s." % target
print("Generated target data for %s." % target)

Loading…
Cancel
Save