Implement BinSearch spider
parent
696d4f1c6e
commit
0917f06de7
@ -1,7 +1,81 @@
|
||||
from shared import NotFoundException
|
||||
from shared import NotFoundException, ModifiedSession, download_file
|
||||
import requests, re, HTMLParser
|
||||
|
||||
class BinsearchSpider(object):
|
||||
pass
|
||||
def find(self, name):
|
||||
parser = HTMLParser.HTMLParser()
|
||||
self.session = ModifiedSession()
|
||||
|
||||
response = self.session.get("https://binsearch.info/index.php", params={
|
||||
"q": name,
|
||||
"m": "",
|
||||
"adv_age": "600",
|
||||
"max": "100",
|
||||
"adv_g": "",
|
||||
"adv_sort": "date",
|
||||
"minsize": "100",
|
||||
"maxsize": "",
|
||||
"adv_col": "on",
|
||||
"adv_nfo": "on",
|
||||
"font": "",
|
||||
"postdate": ""
|
||||
}, verify=False)
|
||||
|
||||
search_results = []
|
||||
|
||||
# Nice try, corrupting your HTML to deter scrapers. Not going to stop me, though.
|
||||
results = re.findall('<tr[^>]+>(.*?)<a href="browse\.php', response.text, re.DOTALL)
|
||||
|
||||
for result in results:
|
||||
if 'requires password' in result:
|
||||
# Password protected
|
||||
continue
|
||||
|
||||
match = re.search('<span[^>]*class="s"[^>]*>(.*?)<\/span>', result, re.DOTALL)
|
||||
|
||||
if match is None:
|
||||
continue
|
||||
|
||||
title = parser.unescape(re.sub("<[^>]+>", "", match.group(1)))
|
||||
|
||||
if name.lower() in title.lower():
|
||||
match = re.search('<input[^>]*type="checkbox"[^>]*name="([0-9]+)"[^>]*>', result)
|
||||
|
||||
if match is not None:
|
||||
search_results.append(BinsearchResult(name, title, match.group(1), self))
|
||||
|
||||
if len(search_results) == 0:
|
||||
raise NotFoundException("No results were found.")
|
||||
|
||||
return search_results
|
||||
|
||||
class BinsearchResult(object):
|
||||
pass
|
||||
def __init__(self, name, title, id_, spider):
|
||||
self.name = name
|
||||
self.title = title
|
||||
self.id_ = id_
|
||||
self.spider = spider
|
||||
|
||||
def show(self):
|
||||
print "%s -> %s (%s)" % (self.title, self.id_, self.name)
|
||||
|
||||
def download(self, target_path):
|
||||
data_dict = {"action": "nzb"}
|
||||
data_dict[self.id_] = "on"
|
||||
|
||||
response = self.spider.session.post("https://www.binsearch.info/fcgi/nzb.fcgi", params={
|
||||
"q": self.name,
|
||||
"m": "",
|
||||
"adv_age": "600",
|
||||
"max": "100",
|
||||
"adv_g": "",
|
||||
"adv_sort": "date",
|
||||
"minsize": "100",
|
||||
"maxsize": "",
|
||||
"adv_col": "on",
|
||||
"adv_nfo": "on",
|
||||
"font": "",
|
||||
"postdate": ""
|
||||
}, data=data_dict)
|
||||
|
||||
download_file(response, target_path)
|
||||
|
Loading…
Reference in New Issue