Code so far

11 years ago · 60825fd9fe
parent d502f11151
commit 60825fd9fe
6 changed files with 200 additions and 3 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,2 @@
 *.pyc
 config.json
--- a/main.py
+++ b/main.py
@ -1,4 +1,102 @@
-import re, oursql, requests
+import re, oursql, requests, sys, json, shlex, argparse
 from sources.nzbindex import NzbindexSpider
 from sources.binsearch import BinsearchSpider
 from shared import NotFoundException
 parser = argparse.ArgumentParser(description="Automatically download NZBs for releases")
 parser.add_argument("--config", dest="config", action="store", help="Use a configuration file to match against the database as source")
 parser.add_argument("--list", dest="list", action="store", help="Use a newline-delimited list of releases as source")
 parser.add_argument("--target", dest="target", action="store", help="Where to save the NZBs (only needed in list mode)")
 parser.add_argument("--iplist", dest="list", action="store", help="Bind every request to a random IP from a newline-delimited list")
 args = parser.parse_args()
 if args.config is not None:
 	mode = "config"
 elif args.list is not None:
 	mode = "list"
 else:
 	sys.stderr.write("You must specify either a configuration file or a release list.\n")
 	exit(1)
 if mode == "config":
 	try:
 		conf = json.load(open("config.json", "r"))
 	except IOError, e:
 		sys.stderr.write("You must have a valid config.json.\n")
 		exit(1)
 	if not re.match("^[a-zA-Z0-9_-]+$", conf['db']['table']):
 		sys.stderr.write("Table name must be a-z, A-Z, 0-9, _, -\n")
 		exit(1)
 	try:
 		searchconf_file = open(args.config, "r")
 	except IOError, e:
 		sys.stderr.write("The specified configuration file doesn't exist.\n")
 		exit(1)
 	queries = searchconf_file.read().splitlines()
 	searchconf_file.close()
 	db = oursql.connect(host=conf['db']['host'], user=conf['db']['user'], passwd=conf['db']['pass'], db=conf['db']['db'], autoreconnect=True)
 	c = db.cursor()
 	releases = []
 	for query in queries:
 		title, section, target = shlex.split(query)
 		fields = []
 		values = []
 		if title != "-":
 			fields.append("`release` LIKE ?")
 			values.append("%" + title + "%")
 		if section != "-":
 			fields.append("`section` LIKE ?")
 			values.append("%" + section + "%")
 		if len(fields) == 0:
 			db_query = "SELECT `release` FROM %s WHERE `time` < (UNIX_TIMESTAMP(NOW()) - 86400)" % conf['db']['table']
 		else:
 			db_query = "SELECT `release` FROM %s WHERE %s AND `time` < (UNIX_TIMESTAMP(NOW()) - 86400)" % (conf['db']['table'], " AND ".join(fields))
 		c.execute(db_query, values)
 		for row in c:
 			releases.append((row[0], target))
 elif mode == "list":
 	if args.target is None:
 		sys.stderr.write("You did not specify a target directory with --target.\n")
 		exit(1)
 	try:
 		list_file = open(args.list, "r")
 	except IOError, e:
 		sys.stderr.write("The specified list file doesn't exist.\n")
 		exit(1)
 	releases = [(release, args.target) for release in list_file.read().splitlines()]
 	list_file.close()
 sys.stdout.write("Found %d releases.\n" % len(releases))
 for release in releases:
 	release_name, target_dir = release
 	try:
 		spider = NzbindexSpider()
 		results = spider.find(release_name)
 	except NotFoundException, e:
 		try:
 			spider = BinsearchSpider()
 			results = spider.find(release_name)
 		except NotFoundException, e:
 			sys.stderr.write("Could not find release %s\n" % release_name)
 			continue
 	# Process result
 	for result in results:
 		result.show()
--- a/shared.py
+++ b/shared.py
@ -0,0 +1,31 @@
 import requests, random
 # These are just some random useragents, you can replace these with a different list
 user_agents = [
 	"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.110 Safari/537.36",
 	"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20100101 Firefox/21.0",
 	"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.94 Safari/537.36",
 	"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.110 Safari/537.36",
 	"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.110 Safari/537.36",
 	"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36",
 	"Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20100101 Firefox/21.0",
 	"Mozilla/5.0 (Windows NT 5.1; rv:21.0) Gecko/20100101 Firefox/21.0",
 	"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit/536.30.1 (KHTML, like Gecko) Version/6.0.5 Safari/536.30.1",
 ]
 class NotFoundException(Exception):
 	pass
 class ModifiedSession(requests.Session):
 	def __init__(self, *args, **kwargs):
 		requests.Session.__init__(self, *args, **kwargs)
 		self.headers['user-agent'] = random.choice(user_agents)
 def download_file(self, request, target):
 	if request.status_code == 200:
 		f = open(target, "wb")
 		for chunk in request.iter_content():
 			f.write(chunk)
 		f.close()
--- a/sources/init.py
+++ b/sources/init.py
@ -1,2 +1 @@
-class NzbindexSpider(object):
+
 	pass
--- a/sources/binsearch.py
+++ b/sources/binsearch.py
@ -1,2 +1,7 @@
 from shared import NotFoundException
 class BinsearchSpider(object):
 	pass
 class BinsearchResult(object):
 	pass
--- a/sources/nzbindex.py
+++ b/sources/nzbindex.py
@ -1 +1,63 @@
 from shared import NotFoundException, ModifiedSession, download_file
 import requests, re, HTMLParser
 class NzbindexSpider(object):
 	def find(self, name):
 		parser = HTMLParser.HTMLParser()
 		self.session = ModifiedSession()
 		self.session.post("https://nzbindex.com/agree/", data={"agree": "I agree"}, verify=False)
 		response = self.session.get("https://nzbindex.com/search/", params={
 			"q": name,
 			"age": "",
 			"max": "50",
 			"minage": "",
 			"sort": "agedesc",
 			"minsize": "100",
 			"maxsize": "",
 			"dq": "",
 			"poster": "",
 			"nfo": "",
 			"hasnfo": "1",
 			"complete": "1",
 			"hidespam": "1",
 			"more": "1"
 		}, verify=False)
 		search_results = []
 		results = re.findall("<tr[^>]+>(.*?)<\/tr>", response.text, re.DOTALL)
 		for result in results:
 			if 'class="threat"' in result:
 				# Password protected or otherwise unsuitable for download
 				continue
 			match = re.search("<label[^>]+>(.*?)<\/label>", result, re.DOTALL)
 			if match is None:
 				continue
 			title = parser.unescape(re.sub("<[^>]+>", "", match.group(1)))
 			if name.lower() in title.lower():
 				match = re.search('https?:\/\/nzbindex\.com\/download\/[^"]+\.nzb', result)
 				if match is not None:
 					search_results.append(NzbindexResult(title, match.group(0)))
 		if len(search_results) == 0:
 			raise NotFoundException("No results were found.")
 		return search_results
 class NzbindexResult(object):
 	def __init__(self, title, url, spider):
 		self.title = title
 		self.url = url
 		self.spider = spider
 	def show(self):
 		print "%s -> %s" % (self.title, self.url)
 	def download(self, target_path):
 		download_file(self.spider.session.get(self.url), target_path)