diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..936a3c8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.pyc +config.json diff --git a/main.py b/main.py index 31f2657..8a986bb 100644 --- a/main.py +++ b/main.py @@ -1,4 +1,102 @@ -import re, oursql, requests +import re, oursql, requests, sys, json, shlex, argparse from sources.nzbindex import NzbindexSpider from sources.binsearch import BinsearchSpider +from shared import NotFoundException + +parser = argparse.ArgumentParser(description="Automatically download NZBs for releases") +parser.add_argument("--config", dest="config", action="store", help="Use a configuration file to match against the database as source") +parser.add_argument("--list", dest="list", action="store", help="Use a newline-delimited list of releases as source") +parser.add_argument("--target", dest="target", action="store", help="Where to save the NZBs (only needed in list mode)") +parser.add_argument("--iplist", dest="list", action="store", help="Bind every request to a random IP from a newline-delimited list") +args = parser.parse_args() + +if args.config is not None: + mode = "config" +elif args.list is not None: + mode = "list" +else: + sys.stderr.write("You must specify either a configuration file or a release list.\n") + exit(1) + +if mode == "config": + try: + conf = json.load(open("config.json", "r")) + except IOError, e: + sys.stderr.write("You must have a valid config.json.\n") + exit(1) + + if not re.match("^[a-zA-Z0-9_-]+$", conf['db']['table']): + sys.stderr.write("Table name must be a-z, A-Z, 0-9, _, -\n") + exit(1) + + try: + searchconf_file = open(args.config, "r") + except IOError, e: + sys.stderr.write("The specified configuration file doesn't exist.\n") + exit(1) + + queries = searchconf_file.read().splitlines() + searchconf_file.close() + + db = oursql.connect(host=conf['db']['host'], user=conf['db']['user'], passwd=conf['db']['pass'], db=conf['db']['db'], autoreconnect=True) + c = db.cursor() + + releases = [] + + for query in queries: + title, section, target = shlex.split(query) + + fields = [] + values = [] + + if title != "-": + fields.append("`release` LIKE ?") + values.append("%" + title + "%") + + if section != "-": + fields.append("`section` LIKE ?") + values.append("%" + section + "%") + + if len(fields) == 0: + db_query = "SELECT `release` FROM %s WHERE `time` < (UNIX_TIMESTAMP(NOW()) - 86400)" % conf['db']['table'] + else: + db_query = "SELECT `release` FROM %s WHERE %s AND `time` < (UNIX_TIMESTAMP(NOW()) - 86400)" % (conf['db']['table'], " AND ".join(fields)) + + c.execute(db_query, values) + + for row in c: + releases.append((row[0], target)) +elif mode == "list": + if args.target is None: + sys.stderr.write("You did not specify a target directory with --target.\n") + exit(1) + + try: + list_file = open(args.list, "r") + except IOError, e: + sys.stderr.write("The specified list file doesn't exist.\n") + exit(1) + + releases = [(release, args.target) for release in list_file.read().splitlines()] + list_file.close() + +sys.stdout.write("Found %d releases.\n" % len(releases)) + +for release in releases: + release_name, target_dir = release + + try: + spider = NzbindexSpider() + results = spider.find(release_name) + except NotFoundException, e: + try: + spider = BinsearchSpider() + results = spider.find(release_name) + except NotFoundException, e: + sys.stderr.write("Could not find release %s\n" % release_name) + continue + + # Process result + for result in results: + result.show() diff --git a/shared.py b/shared.py new file mode 100644 index 0000000..6611001 --- /dev/null +++ b/shared.py @@ -0,0 +1,31 @@ +import requests, random + +# These are just some random useragents, you can replace these with a different list +user_agents = [ + "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.110 Safari/537.36", + "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20100101 Firefox/21.0", + "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.94 Safari/537.36", + "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.110 Safari/537.36", + "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.110 Safari/537.36", + "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36", + "Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20100101 Firefox/21.0", + "Mozilla/5.0 (Windows NT 5.1; rv:21.0) Gecko/20100101 Firefox/21.0", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit/536.30.1 (KHTML, like Gecko) Version/6.0.5 Safari/536.30.1", +] + +class NotFoundException(Exception): + pass + +class ModifiedSession(requests.Session): + def __init__(self, *args, **kwargs): + requests.Session.__init__(self, *args, **kwargs) + self.headers['user-agent'] = random.choice(user_agents) + +def download_file(self, request, target): + if request.status_code == 200: + f = open(target, "wb") + + for chunk in request.iter_content(): + f.write(chunk) + + f.close() diff --git a/sources/__init__.py b/sources/__init__.py index 77f1e4a..8b13789 100644 --- a/sources/__init__.py +++ b/sources/__init__.py @@ -1,2 +1 @@ -class NzbindexSpider(object): - pass + diff --git a/sources/binsearch.py b/sources/binsearch.py index ed9d53d..21a57d2 100644 --- a/sources/binsearch.py +++ b/sources/binsearch.py @@ -1,2 +1,7 @@ +from shared import NotFoundException + class BinsearchSpider(object): pass + +class BinsearchResult(object): + pass diff --git a/sources/nzbindex.py b/sources/nzbindex.py index 8b13789..509170f 100644 --- a/sources/nzbindex.py +++ b/sources/nzbindex.py @@ -1 +1,63 @@ +from shared import NotFoundException, ModifiedSession, download_file +import requests, re, HTMLParser +class NzbindexSpider(object): + def find(self, name): + parser = HTMLParser.HTMLParser() + self.session = ModifiedSession() + self.session.post("https://nzbindex.com/agree/", data={"agree": "I agree"}, verify=False) + + response = self.session.get("https://nzbindex.com/search/", params={ + "q": name, + "age": "", + "max": "50", + "minage": "", + "sort": "agedesc", + "minsize": "100", + "maxsize": "", + "dq": "", + "poster": "", + "nfo": "", + "hasnfo": "1", + "complete": "1", + "hidespam": "1", + "more": "1" + }, verify=False) + + search_results = [] + + results = re.findall("]+>(.*?)<\/tr>", response.text, re.DOTALL) + + for result in results: + if 'class="threat"' in result: + # Password protected or otherwise unsuitable for download + continue + + match = re.search("]+>(.*?)<\/label>", result, re.DOTALL) + + if match is None: + continue + + title = parser.unescape(re.sub("<[^>]+>", "", match.group(1))) + + if name.lower() in title.lower(): + match = re.search('https?:\/\/nzbindex\.com\/download\/[^"]+\.nzb', result) + + if match is not None: + search_results.append(NzbindexResult(title, match.group(0))) + + if len(search_results) == 0: + raise NotFoundException("No results were found.") + + return search_results +class NzbindexResult(object): + def __init__(self, title, url, spider): + self.title = title + self.url = url + self.spider = spider + + def show(self): + print "%s -> %s" % (self.title, self.url) + + def download(self, target_path): + download_file(self.spider.session.get(self.url), target_path)