2.1.0 release with Python 3 support

11 years ago · 59c031acde
parent 488ceed672 9b16e99582
commit 59c031acde
6 changed files with 157 additions and 103 deletions
--- a/.gitignore
+++ b/.gitignore
@ -2,3 +2,4 @@
 build
 dist
 *.egg-info
+.tox
--- a/README.md
+++ b/README.md
@ -69,7 +69,7 @@ Make sure to verify (using `pwhois` or otherwise) that the WHOIS data for the do

 	./test.py update thedomain.com
 	
-### Running the full test suite
+### Running all tests

 	./test.py run all
 	
@ -77,6 +77,10 @@ Make sure to verify (using `pwhois` or otherwise) that the WHOIS data for the do

 	./test.py run thedomain.com

+### Running the full test suite including support for multiple python versions
+
+    tox
+
 ### Generating documentation

 You need [ZippyDoc](http://cryto.net/zippydoc) (which can be installed through `pip install zippydoc`).
--- a/pythonwhois/parse.py
+++ b/pythonwhois/parse.py
@ -1,4 +1,5 @@
-import re, datetime
+from __future__ import print_function
+import re, sys, datetime

 grammar = {
 	"_data": {
@ -136,14 +137,25 @@ grammar = {
 	}
 }

+
+if sys.version_info < (3, 0):
+	def is_string(data):
+		"""Test for string with support for python 2."""
+		return isinstance(data, basestring)
+else:
+	def is_string(data):
+		"""Test for string with support for python 3."""
+		return isinstance(data, str)
+
+
 def parse_raw_whois(raw_data, normalized=[]):
 	data = {}

 	raw_data = [segment.replace("\r", "") for segment in raw_data] # Carriage returns are the devil

 	for segment in raw_data:
-		for rule_key, rule_regexes in grammar['_data'].iteritems():
-			if data.has_key(rule_key) == False:
+		for rule_key, rule_regexes in grammar['_data'].items():
+			if (rule_key in data) == False:
 				for line in segment.splitlines():
 					for regex in rule_regexes:
 						result = re.search(regex, line, re.IGNORECASE)
@ -153,7 +165,7 @@ def parse_raw_whois(raw_data, normalized=[]):
 							if val != "":
 								try:
 									data[rule_key].append(val)
-								except KeyError, e:
+								except KeyError as e:
 									data[rule_key] = [val]

 		# Whois.com is a bit special... Fabulous.com also seems to use this format.
@ -163,7 +175,7 @@ def parse_raw_whois(raw_data, normalized=[]):
 			for match in re.findall("[ ]+(.+)\n", chunk):
 				try:
 					data["nameservers"].append(match.strip())
-				except KeyError, e:
+				except KeyError as e:
 					data["nameservers"] = [match.strip()]
 		# Nominet also needs some special attention
 		match = re.search("    Registrar:\n        (.+)\n", segment)
@ -176,7 +188,7 @@ def parse_raw_whois(raw_data, normalized=[]):
 				match = match.split()[0]
 				try:
 					data["nameservers"].append(match.strip())
-				except KeyError, e:
+				except KeyError as e:
 					data["nameservers"] = [match.strip()]
 		# .am plays the same game
 		match = re.search("   DNS servers:([\s\S]*?\n)\n", segment)
@ -186,7 +198,7 @@ def parse_raw_whois(raw_data, normalized=[]):
 				match = match.split()[0]
 				try:
 					data["nameservers"].append(match.strip())
-				except KeyError, e:
+				except KeyError as e:
 					data["nameservers"] = [match.strip()]
 		# SIDN isn't very standard either.
 		match = re.search("Registrar:\n\s+(\S.*)", segment)
@ -199,7 +211,7 @@ def parse_raw_whois(raw_data, normalized=[]):
 				match = match.split()[0]
 				try:
 					data["nameservers"].append(match.strip())
-				except KeyError, e:
+				except KeyError as e:
 					data["nameservers"] = [match.strip()]
 		# The .ie WHOIS server puts ambiguous status information in an unhelpful order
 		match = re.search('ren-status:\s*(.+)', segment)
@ -212,34 +224,34 @@ def parse_raw_whois(raw_data, normalized=[]):
 	try:
 		data['expiration_date'] = remove_duplicates(data['expiration_date'])
 		data['expiration_date'] = parse_dates(data['expiration_date'])
-	except KeyError, e:
+	except KeyError as e:
 		pass # Not present

 	try:
 		data['creation_date'] = remove_duplicates(data['creation_date'])
 		data['creation_date'] = parse_dates(data['creation_date'])
-	except KeyError, e:
+	except KeyError as e:
 		pass # Not present

 	try:
 		data['updated_date'] = remove_duplicates(data['updated_date'])
 		data['updated_date'] = parse_dates(data['updated_date'])
-	except KeyError, e:
+	except KeyError as e:
 		pass # Not present

 	try:
 		data['nameservers'] = remove_duplicates([ns.rstrip(".") for ns in data['nameservers']])
-	except KeyError, e:
+	except KeyError as e:
 		pass # Not present

 	try:
 		data['emails'] = remove_duplicates(data['emails'])
-	except KeyError, e:
+	except KeyError as e:
 		pass # Not present

 	try:
 		data['registrar'] = remove_duplicates(data['registrar'])
-	except KeyError, e:
+	except KeyError as e:
 		pass # Not present

 	# Remove e-mail addresses if they are already listed for any of the contacts
@ -248,14 +260,14 @@ def parse_raw_whois(raw_data, normalized=[]):
 		if data["contacts"][contact] is not None:
 			try:
 				known_emails.append(data["contacts"][contact]["email"])
-			except KeyError, e:
+			except KeyError as e:
 				pass # No e-mail recorded for this contact...
 	try:
 		data['emails'] = [email for email in data["emails"] if email not in known_emails]
-	except KeyError, e:
+	except KeyError as e:
 		pass # Not present

-	for key in data.keys():
+	for key in list(data.keys()):
 		if data[key] is None or len(data[key]) == 0:
 			del data[key]

@ -269,23 +281,23 @@ def parse_raw_whois(raw_data, normalized=[]):
 def normalize_data(data, normalized):
 	for key in ("nameservers", "emails", "whois_server"):
 		if key in data and data[key] is not None and (normalized == True or key in normalized):
-			if isinstance(data[key], basestring):
+			if is_string(data[key]):
 				data[key] = data[key].lower()
 			else:
 				data[key] = [item.lower() for item in data[key]]

 	for key, threshold in (("registrar", 4), ("status", 3)):
 		if key in data and data[key] is not None and (normalized == True or key in normalized):
-			if isinstance(data[key], basestring):
+			if is_string(data[key]):
 				data[key] = normalize_name(data[key], abbreviation_threshold=threshold, length_threshold=1)
 			else:
 				data[key] = [normalize_name(item, abbreviation_threshold=threshold, length_threshold=1) for item in data[key]]

-	for contact_type, contact in data['contacts'].iteritems():
+	for contact_type, contact in data['contacts'].items():
 		if contact is not None:
 			for key in ("email",):
 				if key in contact and contact[key] is not None and (normalized == True or key in normalized):
-					if isinstance(contact[key], basestring):
+					if isinstance(contact[key], str):
 						contact[key] = contact[key].lower()
 					else:
 						contact[key] = [item.lower() for item in contact[key]]
@ -298,10 +310,10 @@ def normalize_data(data, normalized):
 				if key in contact and contact[key] is not None and (normalized == True or key in normalized):
 					contact[key] = normalize_name(contact[key], abbreviation_threshold=3, length_threshold=3)

-			for key in contact.keys():
+			for key in list(contact.keys()):
 				try:
 					contact[key] = contact[key].strip(", ")
-				except AttributeError, e:
+				except AttributeError as e:
 					pass # Not a string
 	return data

@ -368,37 +380,37 @@ def parse_dates(dates):
 					# This will require some more guesswork - some WHOIS servers present the name of the month
 					try:
 						month = int(result.group("month"))
-					except ValueError, e:
+					except ValueError as e:
 						# Apparently not a number. Look up the corresponding number.
 						try:
 							month = grammar['_months'][result.group("month").lower()]
-						except KeyError, e:
+						except KeyError as e:
 							# Unknown month name, default to 0
 							month = 0

 					try:
 						hour = int(result.group("hour"))
-					except IndexError, e:
+					except IndexError as e:
 						hour = 0
-					except TypeError, e:
+					except TypeError as e:
 						hour = 0

 					try:
 						minute = int(result.group("minute"))
-					except IndexError, e:
+					except IndexError as e:
 						minute = 0
-					except TypeError, e:
+					except TypeError as e:
 						minute = 0

 					try:
 						second = int(result.group("second"))
-					except IndexError, e:
+					except IndexError as e:
 						second = 0
-					except TypeError, e:
+					except TypeError as e:
 						second = 0

 					break
-				except ValueError, e:
+				except ValueError as e:
 					# Something went horribly wrong, maybe there is no valid date present?
 					year = 0
 					month = 0
@ -406,16 +418,16 @@ def parse_dates(dates):
 					hour = 0
 					minute = 0
 					second = 0
-					print e.message
+					print(e.message)
 		try:
 			if year > 0:
 				try:
 					parsed_dates.append(datetime.datetime(year, month, day, hour, minute, second))
-				except ValueError, e:
+				except ValueError as e:
 					# We might have gotten the day and month the wrong way around, let's try it the other way around
 					# If you're not using an ISO-standard date format, you're an evil registrar!
 					parsed_dates.append(datetime.datetime(year, day, month, hour, minute, second))
-		except UnboundLocalError, e:
+		except UnboundLocalError as e:
 			pass

 	if len(parsed_dates) > 0:
@ -626,7 +638,7 @@ def parse_registrants(data):
 	# Post-processing
 	for obj in (registrant, tech_contact, billing_contact, admin_contact):
 		if obj is not None:
-			for key in obj.keys():
+			for key in list(obj.keys()):
 				if obj[key] is None or obj[key].strip() == "": # Just chomp all surrounding whitespace
 					del obj[key]
 				else:
@ -642,7 +654,7 @@ def parse_registrants(data):
 					try:
 						street_items.append(obj["street%d" % i])
 						del obj["street%d" % i]
-					except KeyError, e:
+					except KeyError as e:
 						break
 					i += 1
 				obj["street"] = "\n".join(street_items)
--- a/setup.py
+++ b/setup.py
@ -1,12 +1,13 @@
 from setuptools import setup

 setup(name='pythonwhois',
-      version='2.0.5',
+      version='2.1.0',
      description='Module for retrieving and parsing the WHOIS data for a domain. Supports most domains. No dependencies.',
      author='Sven Slootweg',
      author_email='pythonwhois@cryto.net',
      url='http://cryto.net/pythonwhois',
      packages=['pythonwhois'],
+      install_requires=['argparse'],
      provides=['pythonwhois'],
      scripts=["pwhois"],
      license="WTFPL"
--- a/test.py
+++ b/test.py
@ -1,6 +1,27 @@
 #!/usr/bin/env python2

-import sys, argparse, os, pythonwhois, json, datetime
+import sys, argparse, os, pythonwhois, json, datetime, codecs
+import pkgutil
+import encodings
+
+
+def get_codecs():
+	"""Dynamically get list of codecs in python."""
+	false_positives = set(["aliases"])
+	found = set(name for imp, name, ispkg in pkgutil.iter_modules(encodings.__path__) if not ispkg)
+	found.difference_update(false_positives)
+	return found
+
+
+def read_encoded_file(file_path):
+	"""Try reading file using all codecs. Return the first succesfull one."""
+	for encoding in get_codecs():
+		try:
+			with codecs.open(file_path, "r", encoding) as f:
+				return f.read()
+		except Exception:
+			pass
+

 parser = argparse.ArgumentParser(description="Runs or modifies the test suite for python-whois.")
 parser.add_argument("mode", nargs=1, choices=["run", "update"], default="run", help="Whether to run or update the tests. Only update if you know what you're doing!")
@ -14,7 +35,7 @@ ENDC = '\033[0m'
 def encoded_json_dumps(obj):
 	try:
 		return json.dumps(obj, default=json_fallback)
-	except UnicodeDecodeError, e:
+	except UnicodeDecodeError as e:
 		return json.dumps(recursive_encode(obj, "latin-1"), default=json_fallback)

 def json_fallback(obj):
@ -24,7 +45,7 @@ def json_fallback(obj):
 		return obj

 def recursive_encode(obj, encoding):
-	for key in obj.keys():
+	for key in list(obj.keys()):
 		if isinstance(obj[key], dict):
 			obj[key] = recursive_encode(obj[key], encoding)
 		elif isinstance(obj[key], list):
@ -74,18 +95,26 @@ if args.mode[0] == "run":
 	suites = []
 	for target in targets:
 		try:
-			with open(os.path.join("test/data", target), "r") as f:
+			with codecs.open(os.path.join("test/data", target), "r") as f:
 				data = f.read().split("\n--\n")
-		except IOError, e:
+		except IOError as e:
 			sys.stderr.write("Invalid domain %(domain)s specified. No test case or base data exists.\n" % {"domain": target})
 			errors = True
 			continue
+		except UnicodeDecodeError:
+			try:
+				# Try cp1252 (ufpa.br uses that)
+				with codecs.open(os.path.join("test/data", target), "r", 'cp1252') as f:
+					data = f.read().split("\n--\n")
+			except UnicodeDecodeError as e:
+				# Fall back to trying all registered codecs
+				data = read_encoded_file(os.path.join("test/data", target)).split("\n--\n")
 		try:
-			with open(os.path.join("test/target_default", target), "r") as f:
+			with codecs.open(os.path.join("test/target_default", target), "r") as f:
 				default = f.read()
-			with open(os.path.join("test/target_normalized", target), "r") as f:
+			with codecs.open(os.path.join("test/target_normalized", target), "r") as f:
 				normalized = f.read()
-		except IOError, e:
+		except IOError as e:
 			sys.stderr.write("Missing target data for domain %(domain)s. Run `./test.py update %(domain)s` to correct this, after verifying that pythonwhois can correctly parse this particular domain.\n" % {"domain": target})
 			errors = True
 			continue
@ -152,10 +181,10 @@ elif args.mode[0] == "update":
 	updates = []
 	for target in targets:
 		try:
-			with open(os.path.join("test/data", target), "r") as f:
+			with codecs.open(os.path.join("test/data", target), "r") as f:
 				data = f.read().split("\n--\n")
 			updates.append((target, data))
-		except IOError, e:
+		except IOError as e:
 			sys.stderr.write("Invalid domain %(domain)s specified. No base data exists.\n" % {"domain": target})
 			errors = True
 			continue
@ -166,8 +195,8 @@ elif args.mode[0] == "update":
 	for target, data in updates:
 		default = pythonwhois.parse.parse_raw_whois(data)
 		normalized = pythonwhois.parse.parse_raw_whois(data, normalized=True)
-		with open(os.path.join("test/target_default", target), "w") as f:
+		with codecs.open(os.path.join("test/target_default", target), "w") as f:
 			f.write(encoded_json_dumps(default))
-		with open(os.path.join("test/target_normalized", target), "w") as f:
+		with codecs.open(os.path.join("test/target_normalized", target), "w") as f:
 			f.write(encoded_json_dumps(normalized))
-		print "Generated target data for %s." % target
+		print("Generated target data for %s." % target)
--- a/tox.ini
+++ b/tox.ini
@ -0,0 +1,7 @@
+[tox]
+envlist = py26,py27,py33
+
+[testenv]
+usedevelop = True
+setenv = VIRTUAL_ENV={envdir}
+commands = python test.py run all