From a4f00914c41eb4f61d5df311b1c9e1f327af4101 Mon Sep 17 00:00:00 2001 From: Sven Slootweg Date: Fri, 21 Sep 2012 04:23:06 +0200 Subject: [PATCH] Initial commit --- pythonwhois/__init__.py | 170 +++++++++++++++++++++++++++++++++++++++ pythonwhois/__init__.pyc | Bin 0 -> 5284 bytes test.py | 8 ++ 3 files changed, 178 insertions(+) create mode 100644 pythonwhois/__init__.py create mode 100644 pythonwhois/__init__.pyc create mode 100644 test.py diff --git a/pythonwhois/__init__.py b/pythonwhois/__init__.py new file mode 100644 index 0000000..04b37d4 --- /dev/null +++ b/pythonwhois/__init__.py @@ -0,0 +1,170 @@ +#!/usr/bin/python + +import re, subprocess, datetime + +grammar = { + "_default": { + 'domain_name': 'Domain Name:\s?(?P.+)', + 'registrar': 'Registrar:\s?(?P.+)', + 'whois_server': 'Whois Server:\s?(?P.+)', + 'referral_url': 'Referral URL:\s?(?P.+)', + 'updated_date': 'Updated Date:\s?(?P.+)', + 'creation_date': 'Creation Date:\s?(?P.+)', + 'expiration_date': 'Expiration Date:\s?(?P.+)', + 'name_servers': 'Name Server:\s?(?P.+)', + 'status': 'Status:\s?(?P.+)' + }, + "_fallback": { + 'creation_date': ['Created on:\s?(?P.+)', + 'Created on\s?[.]*:\s?(?P.+)\.', + 'Date Registered\s?[.]*:\s?(?P.+)', + 'Domain Created\s?[.]*:\s?(?P.+)'], + 'expiration_date': ['Expires on:\s?(?P.+)', + 'Expires on\s?[.]*:\s?(?P.+)\.', + 'Expiry Date\s?[.]*:\s?(?P.+)', + 'Domain Currently Expires\s?[.]*:\s?(?P.+)'], + 'registrar': ['Registered through:\s?(?P.+)', + 'Registrar Name:\s?(?P.+)'], + 'whois_server': ['Registrar Whois:\s?(?P.+)'], + 'name_servers': ['(?Pd?ns[0-9]+\.[a-z0-9-]+\.[a-z0-9]+)', + '(?P[a-z0-9-]+\.d?ns[0-9]*\.[a-z0-9-]+\.[a-z0-9]+)'], + 'emails': ['(?P[\w.-]+@[\w.-]+\.[\w]{2,4})'] + }, + "_dateformats": ( + '(?P[0-9]{1,2})[./ -](?PJan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[./ -](?P[0-9]{4}|[0-9]{2})' + '(\s+(?P[0-9]{1,2})[:.](?P[0-9]{1,2})[:.](?P[0-9]{1,2}))?', + '(?P[0-9]{4})[./-](?P[0-9]{1,2})[./-](?P[0-9]{1,2})', + '(?P[0-9]{1,2})(?P[0-9]{1,2})(?P[0-9]{4}|[0-9]{2})', + '(?P)(?P)(?P)', + '(?P)(?P)(?P)' + ), + "_months": { + 'jan': 1, + 'january': 1, + 'feb': 2, + 'february': 2, + 'mar': 3, + 'march': 3, + 'apr': 4, + 'april': 4, + 'may': 5, + 'jun': 6, + 'june': 6, + 'jul': 7, + 'july': 7, + 'aug': 8, + 'august': 8, + 'sep': 9, + 'sept': 9, + 'september': 9, + 'oct': 10, + 'october': 10, + 'nov': 11, + 'november': 11, + 'dec': 12, + 'december': 12 + }, + ".*\.ru$": { + 'domain_name': 'domain:\s*(?P.+)', + 'registrar': 'registrar:\s*(?P.+)', + 'creation_date': 'created:\s*(?P.+)', + 'expiration_date': 'paid-till:\s*(?P.+)', + 'name_servers': 'nserver:\s*(?P.+)', + 'status': 'state:\s*(?P.+)' + } +} + +def whois(domain): + ruleset = None + + for regex, rules in grammar.iteritems(): + if regex.startswith("_") == False and re.match(regex, domain): + ruleset = rules + + if ruleset is None: + ruleset = grammar['_default'] + + data = {} + + ping = subprocess.Popen(["whois", domain], stdout = subprocess.PIPE, stderr = subprocess.PIPE) + out, error = ping.communicate() + + for line in out.splitlines(): + for rule_key, rule_regex in ruleset.iteritems(): + result = re.search(rule_regex, line, re.IGNORECASE) + + if result is not None: + try: + data[rule_key].append(result.group("val").strip()) + except KeyError, e: + data[rule_key] = [result.group("val").strip()] + + # Run through fallback detection to gather missing info + for rule_key, rule_regexes in grammar['_fallback'].iteritems(): + if data.has_key(rule_key) == False: + for line in out.splitlines(): + for regex in rule_regexes: + result = re.search(regex, line, re.IGNORECASE) + + if result is not None: + try: + data[rule_key].append(result.group("val").strip()) + except KeyError, e: + data[rule_key] = [result.group("val").strip()] + + # Fill all missing values with None + if data.has_key(rule_key) == False: + data[rule_key] = None + + # Parse dates + if data['expiration_date'] is not None: + data['expiration_date'] = parse_dates(data['expiration_date']) + + if data['creation_date'] is not None: + data['creation_date'] = parse_dates(data['creation_date']) + + return data + +def parse_dates(dates): + parsed_dates = [] + + for date in dates: + for rule in grammar['_dateformats']: + result = re.match(rule, date) + + if result is not None: + # These are always numeric. + year = int(result.group("year")) + day = int(result.group("day")) + + # This will require some more guesswork - some WHOIS servers present the name of the month + try: + month = int(result.group("month")) + except ValueError, e: + # Apparently not a number. Look up the corresponding number. + try: + month = grammar['_months'][result.group("month").lower()] + except KeyError, e: + # Unknown month name, default to 0 + month = 0 + + try: + hour = int(result.group("hour")) + except IndexError, e: + hour = 0 + + try: + minute = int(result.group("minute")) + except IndexError, e: + minute = 0 + + try: + second = int(result.group("second")) + except IndexError, e: + second = 0 + + break + + parsed_dates.append(datetime.datetime(year, month, day, hour, minute, second)) + + return parsed_dates diff --git a/pythonwhois/__init__.pyc b/pythonwhois/__init__.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a5b3f351de63aeac3ec413dcd99929a5626e76ae GIT binary patch literal 5284 zcmeHK&2JmW6@RnT7pV__$V$w7xN@Svl4VJ@Cjp?4K2p#!pd+=;^^+(NopLd-T+!pqqj;X^(V@uInz+ z-2gqLdja}LrvWmg`vC?>4+0F4J^^r&^eKSTq=x~{(DeX9Bcxv={W|Hhv9ooc#*xD%R^MYo*?+E%`%*ODnlAeHtv!pMQo&*Bm62KJc zX$NKiE|b3Ez*T_vNndl|y8zcoe-GgMq;EKI6W|A=XC3$JIT81Rtc9Ep zLAGb{o-><)jUzwmWKM$^HHr33ysd0i0z0~Fq8H{B?g7wWGCgmFE&@68+(eOI8hw)c zuv0w?L=Aer7L`QcNmcu?(WCPij}oXkdY-xLThMSfGJYIX!XqUH!BA*0IFWO}EbLZ; z=y;(MU@-?QPK?9K@%$R!Ii5||?FjgnV?VChPA516tx)1VO!L}2_iLrtX7M7v!1+qp zA!Xk=M8&4AO>bQ6U|OHH?}3dYH|qA(L`EOYYsZ+=os)*uV;Ff|5-?+rSyzjCv{KB^VQ$vCF>vOWqPp@3Mx;L>lJu^DB z!9J8LVZ1f>zz>sq=2`N{kCHpp2(X?!sD*e+$(`C}vTUl!<3gM)R$e6YrqCu|H-6O6 zd~GjrCSYKE-A=OltxBy)c7|_GJF`Jpi_M`WH7M2;Dxq$tOx&{b4L1&1<5jdfao9gc z>G0d$Jvtt(NVF+o=D3?NSbSH^hT-6Ws zR2-B_okW%kUBBuiodJmpHMgNd3*l%^BYUITSb=wU2oJ}SPnrD^+p3B76zx8sSW(fV zeN@_wSz7mK55E*89u?8b740iRe78M{h6(#z>|s(uv@e(>#aecT`L~kDt!%rGq7rMx zeJl%Nh_(+~U4#%8aT|kfk}HzK;NI3Pq%=%FWY1+%k5(g;BP|_#!LE6U>`zh&XkGInleMR*#O1daXQ?@w7gyIRp zZ4r(ZPqGP2)XL7ssw+N4+Zl?Ml-e60^(rGSeNN9TxL0?+BK%f%{zxmk{}TUuVW1J& ze>7J($RfqlY`qoPvfBN9yNkaT7k@>`AU0*6cZ4Gkw-EW`-1=O7Bey2Krdt=h|L)eL zx4QGW!f$2gH)`dGm9Kh;YBLC{-cDKJ$BejlXBz?E_u^sZVHA9JvsoMqo?#OjY=3C7 z0o7njQ-f`JoO5{*hYE!&C%MlRMQ(CZ6>O|4wYWLOZBCLHS()Qp&vC`cvBWqhjMR(8 zN@yghHzU8yHH@_eF{%TqhmF}D5-W=B%OKv80(Z|ZSrcIqq#A^q5={;;#}LtlJ@$^*FRNf>VwuAxFHR-LJu(&rUAE3h=V{T6 z9R~x+U~ghJvsEdZnM$EhtJY@_(ITp_ovGI2tx7m0yWMzk^L#!CgE*g`uGVuGApQuC zG-OgeURGUDgWdq19`AxW?G3A}I*a#|8dE3KWa@1-qK4JrA>4>MlRBwh_xLIBVdH+l z2Sdu{7r<<~viVzuA~wJAnL=*v1}a89Rg|6I*ifjQuA-teB41G^QTq6#yXtd}Paj^3 z_$mTb>MM#~3z=#nd#Ijm!*@0Dy|fZ8Q5W`;i__mop&Y9EdEzy6Q?{X7*o(bf&$1|? zD?5LONS}!O)rp|wfdKq!sA%<&Er_{=rC}=z!L$etpb)|q3#RE6p}wY2=8({xmQdzZ zA*q5eh6>$o=|xMZUxb!+&JljxV$d9Lz|jrRD3V?-o2aE+@X;TnimRPLVQdtQob+@) zu6+A(&E2odNtSk=dk-XZs777jYoX*yS?+-{br-*JR+PKM!(|?>@Gyx%bOvFZ<0CBD z{fS?y8JCxwF{R2&6Xj-`)cc_}yF$&~U>x^l{#1;EvJoS)IQe*#gppL%E|(?mgqc0! zRLVWTB*#ZTx5&d|9vaw|MY25`B}@j+-Et8b%> zOD^EcPvc~1z)G^*T1vo7ROLm H`u+a^DE-O) literal 0 HcmV?d00001 diff --git a/test.py b/test.py new file mode 100644 index 0000000..7df347e --- /dev/null +++ b/test.py @@ -0,0 +1,8 @@ +#!/usr/bin/python +import sys, pythonwhois + +result = pythonwhois.whois(sys.argv[1]) +print "Creation date: ", +print result['creation_date'] +print "Expiration date: ", +print result['expiration_date']