From 0676302ae325e4d430e4ed318d49213f8b99952a Mon Sep 17 00:00:00 2001 From: Sven Slootweg Date: Fri, 21 Sep 2012 04:50:24 +0200 Subject: [PATCH] Improve error handling, add some extra heuristic rules, fix a rule, and remove some placeholders --- pythonwhois/__init__.py | 114 ++++++++++++++++++++++++--------------- pythonwhois/__init__.pyc | Bin 5284 -> 5654 bytes test.py | 9 ++-- 3 files changed, 75 insertions(+), 48 deletions(-) diff --git a/pythonwhois/__init__.py b/pythonwhois/__init__.py index 04b37d4..1bf2de6 100644 --- a/pythonwhois/__init__.py +++ b/pythonwhois/__init__.py @@ -18,13 +18,16 @@ grammar = { 'creation_date': ['Created on:\s?(?P.+)', 'Created on\s?[.]*:\s?(?P.+)\.', 'Date Registered\s?[.]*:\s?(?P.+)', - 'Domain Created\s?[.]*:\s?(?P.+)'], + 'Domain Created\s?[.]*:\s?(?P.+)', + 'Domain registered\s?[.]*:\s?(?P.+)'], 'expiration_date': ['Expires on:\s?(?P.+)', 'Expires on\s?[.]*:\s?(?P.+)\.', 'Expiry Date\s?[.]*:\s?(?P.+)', - 'Domain Currently Expires\s?[.]*:\s?(?P.+)'], + 'Domain Currently Expires\s?[.]*:\s?(?P.+)', + 'Record will expire on\s?[.]*:\s?(?P.+)'], 'registrar': ['Registered through:\s?(?P.+)', - 'Registrar Name:\s?(?P.+)'], + 'Registrar Name:\s?(?P.+)', + 'Record maintained by:\s?(?P.+)'], 'whois_server': ['Registrar Whois:\s?(?P.+)'], 'name_servers': ['(?Pd?ns[0-9]+\.[a-z0-9-]+\.[a-z0-9]+)', '(?P[a-z0-9-]+\.d?ns[0-9]*\.[a-z0-9-]+\.[a-z0-9]+)'], @@ -34,9 +37,7 @@ grammar = { '(?P[0-9]{1,2})[./ -](?PJan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[./ -](?P[0-9]{4}|[0-9]{2})' '(\s+(?P[0-9]{1,2})[:.](?P[0-9]{1,2})[:.](?P[0-9]{1,2}))?', '(?P[0-9]{4})[./-](?P[0-9]{1,2})[./-](?P[0-9]{1,2})', - '(?P[0-9]{1,2})(?P[0-9]{1,2})(?P[0-9]{4}|[0-9]{2})', - '(?P)(?P)(?P)', - '(?P)(?P)(?P)' + '(?P[0-9]{1,2})[./ -](?P[0-9]{1,2})[./ -](?P[0-9]{4}|[0-9]{2})' ), "_months": { 'jan': 1, @@ -94,10 +95,12 @@ def whois(domain): result = re.search(rule_regex, line, re.IGNORECASE) if result is not None: - try: - data[rule_key].append(result.group("val").strip()) - except KeyError, e: - data[rule_key] = [result.group("val").strip()] + val = result.group("val").strip() + if val != "": + try: + data[rule_key].append(val) + except KeyError, e: + data[rule_key] = [val] # Run through fallback detection to gather missing info for rule_key, rule_regexes in grammar['_fallback'].iteritems(): @@ -107,10 +110,12 @@ def whois(domain): result = re.search(regex, line, re.IGNORECASE) if result is not None: - try: - data[rule_key].append(result.group("val").strip()) - except KeyError, e: - data[rule_key] = [result.group("val").strip()] + val = result.group("val").strip() + if val != "": + try: + data[rule_key].append(val) + except KeyError, e: + data[rule_key] = [val] # Fill all missing values with None if data.has_key(rule_key) == False: @@ -133,38 +138,59 @@ def parse_dates(dates): result = re.match(rule, date) if result is not None: - # These are always numeric. - year = int(result.group("year")) - day = int(result.group("day")) - - # This will require some more guesswork - some WHOIS servers present the name of the month try: - month = int(result.group("month")) - except ValueError, e: - # Apparently not a number. Look up the corresponding number. - try: - month = grammar['_months'][result.group("month").lower()] - except KeyError, e: - # Unknown month name, default to 0 - month = 0 + # These are always numeric. If they fail, there is no valid date present. + year = int(result.group("year")) + day = int(result.group("day")) - try: - hour = int(result.group("hour")) - except IndexError, e: + # This will require some more guesswork - some WHOIS servers present the name of the month + try: + month = int(result.group("month")) + except ValueError, e: + # Apparently not a number. Look up the corresponding number. + try: + month = grammar['_months'][result.group("month").lower()] + except KeyError, e: + # Unknown month name, default to 0 + month = 0 + + try: + hour = int(result.group("hour")) + except IndexError, e: + hour = 0 + + try: + minute = int(result.group("minute")) + except IndexError, e: + minute = 0 + + try: + second = int(result.group("second")) + except IndexError, e: + second = 0 + + break + except ValueError, e: + # Something went horribly wrong, maybe there is no valid date present? + year = 0 + month = 0 + day = 0 hour = 0 - - try: - minute = int(result.group("minute")) - except IndexError, e: minute = 0 - - try: - second = int(result.group("second")) - except IndexError, e: second = 0 - - break - - parsed_dates.append(datetime.datetime(year, month, day, hour, minute, second)) - - return parsed_dates + print e.message + try: + if year > 0: + try: + parsed_dates.append(datetime.datetime(year, month, day, hour, minute, second)) + except ValueError, e: + # We might have gotten the day and month the wrong way around, let's try it the other way around + # If you're not using an ISO-standard date format, you're an evil registrar! + parsed_dates.append(datetime.datetime(year, day, month, hour, minute, second)) + except UnboundLocalError, e: + pass + + if len(parsed_dates) > 0: + return parsed_dates + else: + return None diff --git a/pythonwhois/__init__.pyc b/pythonwhois/__init__.pyc index a5b3f351de63aeac3ec413dcd99929a5626e76ae..51a6e6035734bf6c715cd741ac2c74db6c1be777 100644 GIT binary patch delta 1794 zcmah}OKe+36g@M~b`pQ%q(Kd#vD=h1{nAfJN}GNl32kYbk3+GW(xNnD1v`nI_x!3l zk}V@z7IafpBe6gN34st|(Jq1z0u{j$Ar>rPK^L$=LM%WCR^7mv_tFod684*W?%dD3 zd*_{*_qM*Vqs#g;7JlWsvy*dJ`!x}da#X&(-qmQRPRs|PVi-`5LUlns2DJgI+p{KU zO+lrhHe%j_3Ua;)^8%`gS`XA_%uA>wCy#sPUTRx7W}voXps*fl2h>g)2)hVRK<)Nn z4`DAAC`hkA-|0P3I*Lxe+6hkZCgI12T&56=*Wp+kKS?vt|Cd7pPJK3v$ZHw!vQ1`0c^p}Vyn9$5I%u)GN zS6Xgv{0OxWiV(9N9dNZnTp(aq2zUi7C4)1EzA}Ge8Lbyy%!g%UY)S~9|E?978 z#7YNh0n}R1S!^{9#)+(*6#+Mig)p3HA?h&*dPoXCa)|DnJtOR|!6$2ff^5|iE;-Z8 zRn}hr2VRt@544~jH)+K-^A0ZV2CyLER7CZI2OD_PYv4L+adz{%PDgqhH^l!GKPVqw ze80R~{BHS>;`?RJ9+dVC!6$3KBeD-EPf69ly>^wfD}s-SRNN^AEVRJ6Cf#-uV@&MK z*M>@7o(kU!`}l4cO>1Fctc6YYMfgR>xp%-znHEYnNO9@qazP)Crh+af(C4DJx{Z0> zDPLUn0xGUkSaw6S%`PwJOG@vLZ4(JS7kf3B*=ACHj^#t+R4QY7GM?(`5K&=?4v`Qk zYXh-e#Kd|TrFPG*uf|iGUCFnQBLdY^vSyeyD(e!}n}U^)^_?T<-qqj4H(4tOg#JA~ zE`HF56Mdbx5pbJ5x_l2oCJGX+-UR!$ZX`@yA13<6i2gCr-&i4?*8CBa1agyE`%987 zK%s&+$f49 zeGs*#hT$BESPO&Xy6hbblHed2<5w_9#;`tfjvh>8QqzC(n3m($&e}1yAM+ZR^3!R_ zab#U`Ul`W2epA$9L~=e&{&47QYo@~-?95Ih_876(h#mU#)@>7qsJdZ_6D5_ejyg`+ zam~fPl&@4~=ku8%<2QGLnsxJTVJXi83!E-pC|@k8vGUyPqMu{Yx|lENSaRRylhiYY z7MTenCXE;~V#93O8`+_1NL*o9wo$NA9 z+A0#(ZnCUdwuyl3vsUjU*ZuoKOQj5J{ciUe?TiuL4w#!Xb3}jEJ}0i|)ViK-ON6eQHa+L_wlX(m%m>^O_i8^_5oX-1jn2bTQ4l_xsL0@5_DX zoO|v&W4BXr$6q1;oo|*3SJ3;pnUC|_`0A~AyS?Is2q7zC5W6Ab5PP6p5q9l`7{ZDM z;o`+GRtUnw);@^+mb;%9BbNIBTM3?Jh!nD`yC6m(((GUwV>$$J*rvysG7#f7O`x3> zlMqw9WO{-r3vq<$Nr-8ijxs$3k+bP(raZ(kn~pQhK%B7Yq)oF-a}cL&dWPvV#2K5; zGM$4sZ_@>23u2xFc0*i*aIm*vXS-k*rDzvo5klp931X1xS!6FeAfCgkz-2B~$ue9J zgvI3Z-9KqblTGLQ@|5dz_fywT^vFprcJf#uCn}ryrOct}<(J3DCR!=^i6<(5cE?9H zhVB!jlyd!Q-FPW)nVZ#0Bj5ei^EZXVEc5bI`)%Bnzxh7ie;1}3wE#N6?D8$NylCEp zNvLVI>vj3L|8RRV0n-W7hgdBPGk{tI!i^5g`jm_fD(RQVY4l^wk4Ay06$JfX$Q@ws zlVf#R{~A0>`e!IL|AAq3M{N7Uf8YfTEgxDTm>DN(1M2hqA;XC^4UKhbet3WAZ(E_i z0W-vTbmV)1$J$%;Kw+n~?XVAOJBOXt%K8@ZDCzG~iIe$nU}a6fO0wcZPA_ZvI`L3S z;YEz!t@+UCXvUxtDHrEtTe-URN5NI@$v)bav8yNJO>Nm?(l57yN8~TTFZ}94R<2#E z)`h$kipnoTDGJNqLpP)zuDUO%!=9BthKJ)0N>GH-j=dD6K?>0>El53|9EuDl?hvp7 zv@t7JfRXKZ$PSymz=^0ml3{UE=OBxCpBG`!ZKe+T03A?}No2l_qv zaNj5e652Q=_)mET6^ GX!2h_k2fL! diff --git a/test.py b/test.py index 7df347e..8e1035a 100644 --- a/test.py +++ b/test.py @@ -2,7 +2,8 @@ import sys, pythonwhois result = pythonwhois.whois(sys.argv[1]) -print "Creation date: ", -print result['creation_date'] -print "Expiration date: ", -print result['expiration_date'] +print result +#print "Creation date: ", +#print result['creation_date'] +#print "Expiration date: ", +#print result['expiration_date']