From c09068f0a21963811beda79c045de0aeb773f9cc Mon Sep 17 00:00:00 2001 From: Sven Slootweg Date: Fri, 27 Jun 2014 23:17:51 +0200 Subject: [PATCH] Unicode fixes for `net` and `pwhois`. Fixes #26. --- pwhois | 24 ++++++++++++------------ pythonwhois/net.py | 15 +++++++++------ pythonwhois/parse.py | 2 +- 3 files changed, 22 insertions(+), 19 deletions(-) diff --git a/pwhois b/pwhois index eb406fd..2ec86d9 100755 --- a/pwhois +++ b/pwhois @@ -3,7 +3,7 @@ import argparse, pythonwhois, json, datetime try: from collections import OrderedDict -except ImportError, e: +except ImportError as e: from ordereddict import OrderedDict parser = argparse.ArgumentParser(description="Retrieves and parses WHOIS data for a domain name.") @@ -27,7 +27,7 @@ else: data = f.read().split("\n--\n") if args.raw == True: - print "\n--\n".join(data) + print("\n--\n".join([x.encode("utf-8") for x in data])) else: if len(server_list) > 0: parsed = pythonwhois.parse.parse_raw_whois(data, normalized=True, never_query_handles=False, handle_server=server_list[-1]) @@ -35,7 +35,7 @@ else: parsed = pythonwhois.parse.parse_raw_whois(data, normalized=True) if args.json == True: - print json.dumps(parsed, default=json_fallback) + print(json.dumps(parsed, default=json_fallback)) else: data_map = OrderedDict({}) @@ -50,18 +50,18 @@ else: data_map["emails"] = ("E-mail address", "+") widest_label = 0 - for key, value in data_map.iteritems(): + for key, value in data_map.items(): if len(value[0]) > widest_label: widest_label = len(value[0]) - for key, value in data_map.iteritems(): + for key, value in data_map.items(): if key in parsed and parsed[key] is not None: label = value[0] + (" " * (widest_label - len(value[0]))) + " :" if value[1] == 1: - print "%s %s" % (label, parsed[key][0]) + print("%s %s" % (label, parsed[key][0])) elif value[1] == "+": for item in parsed[key]: - print "%s %s" % (label, item) + print("%s %s" % (label, item)) if parsed["contacts"] is not None: # This defines the contacts shown in the output @@ -91,17 +91,17 @@ else: if parsed["contacts"][contact] is not None: contact_data = parsed["contacts"][contact] - print "\n" + contacts_map[contact] + print("\n" + contacts_map[contact]) - for key, value in data_map.iteritems(): + for key, value in data_map.items(): if len(value) > widest_label: widest_label = len(value) - for key, value in data_map.iteritems(): + for key, value in data_map.items(): if key in contact_data and contact_data[key] is not None: label = " " + value + (" " * (widest_label - len(value))) + " :" - actual_data = str(contact_data[key]) + actual_data = contact_data[key] if "\n" in actual_data: # Indent multi-line values properly lines = actual_data.split("\n") actual_data = "\n".join([lines[0]] + [(" " * (widest_label + 7)) + line for line in lines[1:]]) - print "%s %s" % (label, actual_data) + print("%s %s" % (label, actual_data)) diff --git a/pythonwhois/net.py b/pythonwhois/net.py index 0f22b8d..ec54014 100644 --- a/pythonwhois/net.py +++ b/pythonwhois/net.py @@ -1,4 +1,4 @@ -import socket, re +import socket, re, sys from codecs import encode, decode from . import shared @@ -12,13 +12,16 @@ def get_whois_raw(domain, server="", previous=[], rfc3490=True, never_cut=False, } if rfc3490: - domain = encode( domain if type(domain) is unicode else decode(domain, "utf8"), "idna" ) + if sys.version_info < (3, 0): + domain = encode( domain if type(domain) is unicode else decode(domain, "utf8"), "idna" ) + else: + domain = encode(domain, "idna").decode("ascii") if len(previous) == 0 and server == "": # Root query server_list = [] # Otherwise it retains the list on subsequent queries, for some reason. is_exception = False - for exception, exc_serv in exceptions.iteritems(): + for exception, exc_serv in exceptions.items(): if domain.endswith(exception): is_exception = True target_server = exc_serv @@ -78,11 +81,11 @@ def get_root_server(domain): def whois_request(domain, server, port=43): sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.connect((server, port)) - sock.send("%s\r\n" % domain) - buff = "" + sock.send(("%s\r\n" % domain).encode("utf-8")) + buff = b"" while True: data = sock.recv(1024) if len(data) == 0: break buff += data - return buff + return buff.decode("utf-8") diff --git a/pythonwhois/parse.py b/pythonwhois/parse.py index 3fb91ec..661de61 100644 --- a/pythonwhois/parse.py +++ b/pythonwhois/parse.py @@ -542,7 +542,7 @@ def normalize_data(data, normalized): if contact is not None: for key in ("email",): if key in contact and contact[key] is not None and (normalized == True or key in normalized): - if isinstance(contact[key], str): + if is_string(contact[key]): contact[key] = contact[key].lower() else: contact[key] = [item.lower() for item in contact[key]]