from shared import NotFoundException, ModifiedSession, download_file import requests, re, HTMLParser class NzbindexSpider(object): def find(self, name): parser = HTMLParser.HTMLParser() self.session = ModifiedSession() self.session.post("https://nzbindex.com/agree/", data={"agree": "I agree"}, verify=False) response = self.session.get("https://nzbindex.com/search/", params={ "q": name, "age": "", "max": "50", "minage": "", "sort": "agedesc", "minsize": "100", "maxsize": "", "dq": "", "poster": "", "nfo": "", "hasnfo": "1", "complete": "1", "hidespam": "1", "more": "1" }, verify=False) search_results = [] results = re.findall("]+>(.*?)<\/tr>", response.text, re.DOTALL) for result in results: if 'class="threat"' in result: # Password protected or otherwise unsuitable for download continue match = re.search("]+>(.*?)<\/label>", result, re.DOTALL) if match is None: continue title = parser.unescape(re.sub("<[^>]+>", "", match.group(1))) if name.lower() in title.lower(): match = re.search('https?:\/\/nzbindex\.com\/download\/[^"]+\.nzb', result) if match is not None: search_results.append(NzbindexResult(title, match.group(0))) if len(search_results) == 0: raise NotFoundException("No results were found.") return search_results class NzbindexResult(object): def __init__(self, title, url, spider): self.title = title self.url = url self.spider = spider def show(self): print "%s -> %s" % (self.title, self.url) def download(self, target_path): download_file(self.spider.session.get(self.url), target_path)