Implement BinSearch spider

11 years ago · 0917f06de7
parent 696d4f1c6e
commit 0917f06de7
1 changed files with 77 additions and 3 deletions
--- a/sources/binsearch.py
+++ b/sources/binsearch.py
@ -1,7 +1,81 @@
-from shared import NotFoundException
+from shared import NotFoundException, ModifiedSession, download_file
+import requests, re, HTMLParser

 class BinsearchSpider(object):
-	pass
+	def find(self, name):
+		parser = HTMLParser.HTMLParser()
+		self.session = ModifiedSession()
+		
+		response = self.session.get("https://binsearch.info/index.php", params={
+			"q": name,
+			"m": "",
+			"adv_age": "600",
+			"max": "100",
+			"adv_g": "",
+			"adv_sort": "date",
+			"minsize": "100",
+			"maxsize": "",
+			"adv_col": "on",
+			"adv_nfo": "on",
+			"font": "",
+			"postdate": ""
+		}, verify=False)
+		
+		search_results = []
+		
+		# Nice try, corrupting your HTML to deter scrapers. Not going to stop me, though.
+		results = re.findall('<tr[^>]+>(.*?)<a href="browse\.php', response.text, re.DOTALL)
+		
+		for result in results:
+			if 'requires password' in result:
+				# Password protected
+				continue
+			
+			match = re.search('<span[^>]*class="s"[^>]*>(.*?)<\/span>', result, re.DOTALL)
+			
+			if match is None:
+				continue
+				
+			title = parser.unescape(re.sub("<[^>]+>", "", match.group(1)))
+			
+			if name.lower() in title.lower():
+				match = re.search('<input[^>]*type="checkbox"[^>]*name="([0-9]+)"[^>]*>', result)
+				
+				if match is not None:
+					search_results.append(BinsearchResult(name, title, match.group(1), self))
+		
+		if len(search_results) == 0:
+			raise NotFoundException("No results were found.")
+				
+		return search_results
 	
 class BinsearchResult(object):
-	pass
+	def __init__(self, name, title, id_, spider):
+		self.name = name
+		self.title = title
+		self.id_ = id_
+		self.spider = spider
+	
+	def show(self):
+		print "%s -> %s (%s)" % (self.title, self.id_, self.name)
+	
+	def download(self, target_path):
+		data_dict = {"action": "nzb"}
+		data_dict[self.id_] = "on"
+		
+		response = self.spider.session.post("https://www.binsearch.info/fcgi/nzb.fcgi", params={
+			"q": self.name,
+			"m": "",
+			"adv_age": "600",
+			"max": "100",
+			"adv_g": "",
+			"adv_sort": "date",
+			"minsize": "100",
+			"maxsize": "",
+			"adv_col": "on",
+			"adv_nfo": "on",
+			"font": "",
+			"postdate": ""
+		}, data=data_dict)
+		
+		download_file(response, target_path)