nzbspider/sources/binsearch.py

from shared import NotFoundException, ModifiedSession, download_file
import requests, re, HTMLParser

class BinsearchSpider(object):
	def find(self, name):
		parser = HTMLParser.HTMLParser()
		self.session = ModifiedSession()
		
		response = self.session.get("https://binsearch.info/index.php", params={
			"q": name,
			"m": "",
			"adv_age": "600",
			"max": "100",
			"adv_g": "",
			"adv_sort": "date",
			"minsize": "100",
			"maxsize": "",
			"adv_col": "on",
			"adv_nfo": "on",
			"font": "",
			"postdate": ""
		}, verify=False)
		
		search_results = []
		
		# Nice try, corrupting your HTML to deter scrapers. Not going to stop me, though.
		results = re.findall('<tr[^>]+>(.*?)<a href="browse\.php', response.text, re.DOTALL)
		
		for result in results:
			if 'requires password' in result:
				# Password protected
				continue
			
			match = re.search('<span[^>]*class="s"[^>]*>(.*?)<\/span>', result, re.DOTALL)
			
			if match is None:
				continue
				
			title = parser.unescape(re.sub("<[^>]+>", "", match.group(1)))
			
			if name.lower() in title.lower():
				match = re.search('<input[^>]*type="checkbox"[^>]*name="([0-9]+)"[^>]*>', result)
				
				if match is not None:
					search_results.append(BinsearchResult(name, title, match.group(1), self))
		
		if len(search_results) == 0:
			raise NotFoundException("No results were found.")
				
		return search_results
	
class BinsearchResult(object):
	def __init__(self, name, title, id_, spider):
		self.name = name
		self.title = title
		self.id_ = id_
		self.spider = spider
	
	def show(self):
		print "%s -> %s (%s)" % (self.title, self.id_, self.name)
	
	def download(self, target_path):
		data_dict = {"action": "nzb"}
		data_dict[self.id_] = "on"
		
		response = self.spider.session.post("https://www.binsearch.info/fcgi/nzb.fcgi", params={
			"q": self.name,
			"m": "",
			"adv_age": "600",
			"max": "100",
			"adv_g": "",
			"adv_sort": "date",
			"minsize": "100",
			"maxsize": "",
			"adv_col": "on",
			"adv_nfo": "on",
			"font": "",
			"postdate": ""
		}, data=data_dict)
		
		download_file(response, target_path)
Implement BinSearch spider 12 years ago			`from shared import NotFoundException, ModifiedSession, download_file`
			`import requests, re, HTMLParser`
Code so far 12 years ago
Initial commit 12 years ago			`class BinsearchSpider(object):`
Implement BinSearch spider 12 years ago			`def find(self, name):`
			`parser = HTMLParser.HTMLParser()`
			`self.session = ModifiedSession()`

			`response = self.session.get("https://binsearch.info/index.php", params={`
			`"q": name,`
			`"m": "",`
			`"adv_age": "600",`
			`"max": "100",`
			`"adv_g": "",`
			`"adv_sort": "date",`
			`"minsize": "100",`
			`"maxsize": "",`
			`"adv_col": "on",`
			`"adv_nfo": "on",`
			`"font": "",`
			`"postdate": ""`
			`}, verify=False)`

			`search_results = []`

			`# Nice try, corrupting your HTML to deter scrapers. Not going to stop me, though.`
			`results = re.findall('<tr[^>]+>(.*?)<a href="browse\.php', response.text, re.DOTALL)`

			`for result in results:`
			`if 'requires password' in result:`
			`# Password protected`
			`continue`

			`match = re.search('<span[^>]class="s"[^>]>(.*?)<\/span>', result, re.DOTALL)`

			`if match is None:`
			`continue`

			`title = parser.unescape(re.sub("<[^>]+>", "", match.group(1)))`

			`if name.lower() in title.lower():`
			`match = re.search('<input[^>]type="checkbox"[^>]name="([0-9]+)"[^>]*>', result)`

			`if match is not None:`
			`search_results.append(BinsearchResult(name, title, match.group(1), self))`

			`if len(search_results) == 0:`
			`raise NotFoundException("No results were found.")`

			`return search_results`
Code so far 12 years ago
			`class BinsearchResult(object):`
Implement BinSearch spider 12 years ago			`def __init__(self, name, title, id_, spider):`
			`self.name = name`
			`self.title = title`
			`self.id_ = id_`
			`self.spider = spider`

			`def show(self):`
			`print "%s -> %s (%s)" % (self.title, self.id_, self.name)`

			`def download(self, target_path):`
			`data_dict = {"action": "nzb"}`
			`data_dict[self.id_] = "on"`

			`response = self.spider.session.post("https://www.binsearch.info/fcgi/nzb.fcgi", params={`
			`"q": self.name,`
			`"m": "",`
			`"adv_age": "600",`
			`"max": "100",`
			`"adv_g": "",`
			`"adv_sort": "date",`
			`"minsize": "100",`
			`"maxsize": "",`
			`"adv_col": "on",`
			`"adv_nfo": "on",`
			`"font": "",`
			`"postdate": ""`
			`}, data=data_dict)`

			`download_file(response, target_path)`