From 0917f06de7d39515d691159b71235cbad49ef2d8 Mon Sep 17 00:00:00 2001 From: Sven Slootweg Date: Sun, 30 Jun 2013 15:36:52 +0200 Subject: [PATCH] Implement BinSearch spider --- sources/binsearch.py | 80 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 77 insertions(+), 3 deletions(-) diff --git a/sources/binsearch.py b/sources/binsearch.py index 21a57d2..0402018 100644 --- a/sources/binsearch.py +++ b/sources/binsearch.py @@ -1,7 +1,81 @@ -from shared import NotFoundException +from shared import NotFoundException, ModifiedSession, download_file +import requests, re, HTMLParser class BinsearchSpider(object): - pass + def find(self, name): + parser = HTMLParser.HTMLParser() + self.session = ModifiedSession() + + response = self.session.get("https://binsearch.info/index.php", params={ + "q": name, + "m": "", + "adv_age": "600", + "max": "100", + "adv_g": "", + "adv_sort": "date", + "minsize": "100", + "maxsize": "", + "adv_col": "on", + "adv_nfo": "on", + "font": "", + "postdate": "" + }, verify=False) + + search_results = [] + + # Nice try, corrupting your HTML to deter scrapers. Not going to stop me, though. + results = re.findall(']+>(.*?)]*>(.*?)<\/span>', result, re.DOTALL) + + if match is None: + continue + + title = parser.unescape(re.sub("<[^>]+>", "", match.group(1))) + + if name.lower() in title.lower(): + match = re.search(']*type="checkbox"[^>]*name="([0-9]+)"[^>]*>', result) + + if match is not None: + search_results.append(BinsearchResult(name, title, match.group(1), self)) + + if len(search_results) == 0: + raise NotFoundException("No results were found.") + + return search_results class BinsearchResult(object): - pass + def __init__(self, name, title, id_, spider): + self.name = name + self.title = title + self.id_ = id_ + self.spider = spider + + def show(self): + print "%s -> %s (%s)" % (self.title, self.id_, self.name) + + def download(self, target_path): + data_dict = {"action": "nzb"} + data_dict[self.id_] = "on" + + response = self.spider.session.post("https://www.binsearch.info/fcgi/nzb.fcgi", params={ + "q": self.name, + "m": "", + "adv_age": "600", + "max": "100", + "adv_g": "", + "adv_sort": "date", + "minsize": "100", + "maxsize": "", + "adv_col": "on", + "adv_nfo": "on", + "font": "", + "postdate": "" + }, data=data_dict) + + download_file(response, target_path)