Code so far

11 years ago · 60825fd9fe
parent d502f11151
commit 60825fd9fe
6 changed files with 200 additions and 3 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,2 @@
+*.pyc
+config.json
--- a/main.py
+++ b/main.py
@ -1,4 +1,102 @@
-import re, oursql, requests
+import re, oursql, requests, sys, json, shlex, argparse

 from sources.nzbindex import NzbindexSpider
 from sources.binsearch import BinsearchSpider
+from shared import NotFoundException
+
+parser = argparse.ArgumentParser(description="Automatically download NZBs for releases")
+parser.add_argument("--config", dest="config", action="store", help="Use a configuration file to match against the database as source")
+parser.add_argument("--list", dest="list", action="store", help="Use a newline-delimited list of releases as source")
+parser.add_argument("--target", dest="target", action="store", help="Where to save the NZBs (only needed in list mode)")
+parser.add_argument("--iplist", dest="list", action="store", help="Bind every request to a random IP from a newline-delimited list")
+args = parser.parse_args()
+
+if args.config is not None:
+	mode = "config"
+elif args.list is not None:
+	mode = "list"
+else:
+	sys.stderr.write("You must specify either a configuration file or a release list.\n")
+	exit(1)
+
+if mode == "config":
+	try:
+		conf = json.load(open("config.json", "r"))
+	except IOError, e:
+		sys.stderr.write("You must have a valid config.json.\n")
+		exit(1)
+	
+	if not re.match("^[a-zA-Z0-9_-]+$", conf['db']['table']):
+		sys.stderr.write("Table name must be a-z, A-Z, 0-9, _, -\n")
+		exit(1)
+	
+	try:
+		searchconf_file = open(args.config, "r")
+	except IOError, e:
+		sys.stderr.write("The specified configuration file doesn't exist.\n")
+		exit(1)
+		
+	queries = searchconf_file.read().splitlines()
+	searchconf_file.close()
+	
+	db = oursql.connect(host=conf['db']['host'], user=conf['db']['user'], passwd=conf['db']['pass'], db=conf['db']['db'], autoreconnect=True)
+	c = db.cursor()
+	
+	releases = []
+	
+	for query in queries:
+		title, section, target = shlex.split(query)
+		
+		fields = []
+		values = []
+		
+		if title != "-":
+			fields.append("`release` LIKE ?")
+			values.append("%" + title + "%")
+			
+		if section != "-":
+			fields.append("`section` LIKE ?")
+			values.append("%" + section + "%")
+		
+		if len(fields) == 0:
+			db_query = "SELECT `release` FROM %s WHERE `time` < (UNIX_TIMESTAMP(NOW()) - 86400)" % conf['db']['table']
+		else:
+			db_query = "SELECT `release` FROM %s WHERE %s AND `time` < (UNIX_TIMESTAMP(NOW()) - 86400)" % (conf['db']['table'], " AND ".join(fields))
+		
+		c.execute(db_query, values)
+		
+		for row in c:
+			releases.append((row[0], target))
+elif mode == "list":
+	if args.target is None:
+		sys.stderr.write("You did not specify a target directory with --target.\n")
+		exit(1)
+	
+	try:
+		list_file = open(args.list, "r")
+	except IOError, e:
+		sys.stderr.write("The specified list file doesn't exist.\n")
+		exit(1)
+	
+	releases = [(release, args.target) for release in list_file.read().splitlines()]
+	list_file.close()
+
+sys.stdout.write("Found %d releases.\n" % len(releases))
+
+for release in releases:
+	release_name, target_dir = release
+	
+	try:
+		spider = NzbindexSpider()
+		results = spider.find(release_name)
+	except NotFoundException, e:
+		try:
+			spider = BinsearchSpider()
+			results = spider.find(release_name)
+		except NotFoundException, e:
+			sys.stderr.write("Could not find release %s\n" % release_name)
+			continue
+			
+	# Process result
+	for result in results:
+		result.show()
--- a/shared.py
+++ b/shared.py
@ -0,0 +1,31 @@
+import requests, random
+
+# These are just some random useragents, you can replace these with a different list
+user_agents = [
+	"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.110 Safari/537.36",
+	"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20100101 Firefox/21.0",
+	"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.94 Safari/537.36",
+	"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.110 Safari/537.36",
+	"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.110 Safari/537.36",
+	"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36",
+	"Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20100101 Firefox/21.0",
+	"Mozilla/5.0 (Windows NT 5.1; rv:21.0) Gecko/20100101 Firefox/21.0",
+	"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit/536.30.1 (KHTML, like Gecko) Version/6.0.5 Safari/536.30.1",
+]
+
+class NotFoundException(Exception):
+	pass
+	
+class ModifiedSession(requests.Session):
+	def __init__(self, *args, **kwargs):
+		requests.Session.__init__(self, *args, **kwargs)
+		self.headers['user-agent'] = random.choice(user_agents)
+	
+def download_file(self, request, target):
+	if request.status_code == 200:
+		f = open(target, "wb")
+		
+		for chunk in request.iter_content():
+			f.write(chunk)
+			
+		f.close()
--- a/sources/init.py
+++ b/sources/init.py
@ -1,2 +1 @@
-class NzbindexSpider(object):
-	pass
+
--- a/sources/binsearch.py
+++ b/sources/binsearch.py
@ -1,2 +1,7 @@
+from shared import NotFoundException
+
 class BinsearchSpider(object):
 	pass
+	
+class BinsearchResult(object):
+	pass
--- a/sources/nzbindex.py
+++ b/sources/nzbindex.py
@ -1 +1,63 @@
+from shared import NotFoundException, ModifiedSession, download_file
+import requests, re, HTMLParser

+class NzbindexSpider(object):
+	def find(self, name):
+		parser = HTMLParser.HTMLParser()
+		self.session = ModifiedSession()
+		self.session.post("https://nzbindex.com/agree/", data={"agree": "I agree"}, verify=False)
+		
+		response = self.session.get("https://nzbindex.com/search/", params={
+			"q": name,
+			"age": "",
+			"max": "50",
+			"minage": "",
+			"sort": "agedesc",
+			"minsize": "100",
+			"maxsize": "",
+			"dq": "",
+			"poster": "",
+			"nfo": "",
+			"hasnfo": "1",
+			"complete": "1",
+			"hidespam": "1",
+			"more": "1"
+		}, verify=False)
+		
+		search_results = []
+		
+		results = re.findall("<tr[^>]+>(.*?)<\/tr>", response.text, re.DOTALL)
+		
+		for result in results:
+			if 'class="threat"' in result:
+				# Password protected or otherwise unsuitable for download
+				continue
+			
+			match = re.search("<label[^>]+>(.*?)<\/label>", result, re.DOTALL)
+			
+			if match is None:
+				continue
+				
+			title = parser.unescape(re.sub("<[^>]+>", "", match.group(1)))
+			
+			if name.lower() in title.lower():
+				match = re.search('https?:\/\/nzbindex\.com\/download\/[^"]+\.nzb', result)
+				
+				if match is not None:
+					search_results.append(NzbindexResult(title, match.group(0)))
+		
+		if len(search_results) == 0:
+			raise NotFoundException("No results were found.")
+				
+		return search_results
+class NzbindexResult(object):
+	def __init__(self, title, url, spider):
+		self.title = title
+		self.url = url
+		self.spider = spider
+		
+	def show(self):
+		print "%s -> %s" % (self.title, self.url)
+		
+	def download(self, target_path):
+		download_file(self.spider.session.get(self.url), target_path)