from shared import NotFoundException, ModifiedSession, download_file
import requests, re, HTMLParser
class BinsearchSpider(object):
def find(self, name):
parser = HTMLParser.HTMLParser()
self.session = ModifiedSession()
response = self.session.get("https://binsearch.info/index.php", params={
"q": name,
"m": "",
"adv_age": "600",
"max": "100",
"adv_g": "",
"adv_sort": "date",
"minsize": "100",
"maxsize": "",
"adv_col": "on",
"adv_nfo": "on",
"font": "",
"postdate": ""
}, verify=False)
search_results = []
# Nice try, corrupting your HTML to deter scrapers. Not going to stop me, though.
results = re.findall('<tr[^>]+>(.*?)<a href="browse\.php', response.text, re.DOTALL)
for result in results:
if 'requires password' in result:
# Password protected
match = re.search('<span[^>]*class="s"[^>]*>(.*?)<\/span>', result, re.DOTALL)
if match is None:
title = parser.unescape(re.sub("<[^>]+>", "", match.group(1)))
if name.lower() in title.lower():
match = re.search('<input[^>]*type="checkbox"[^>]*name="([0-9]+)"[^>]*>', result)
if match is not None:
search_results.append(BinsearchResult(name, title, match.group(1), self))
if len(search_results) == 0:
raise NotFoundException("No results were found.")
return search_results
class BinsearchResult(object):
def __init__(self, name, title, id_, spider):
self.name = name
self.title = title
self.id_ = id_
self.spider = spider
def show(self):
print "%s -> %s (%s)" % (self.title, self.id_, self.name)
def download(self, target_path):
data_dict = {"action": "nzb"}
data_dict[self.id_] = "on"
response = self.spider.session.post("https://www.binsearch.info/fcgi/nzb.fcgi", params={
"q": self.name,
"m": "",
"adv_age": "600",
"max": "100",
"adv_g": "",
"adv_sort": "date",
"minsize": "100",
"maxsize": "",
"adv_col": "on",
"adv_nfo": "on",
"font": "",
"postdate": ""
}, data=data_dict)
download_file(response, target_path)