From d502f111511a1c7bfe47f240b41cd30aa2641aaf Mon Sep 17 00:00:00 2001
From: Sven Slootweg <jamsoftgamedev@gmail.com>
Date: Sun, 30 Jun 2013 00:06:38 +0200
Subject: [PATCH 01/17] Use requests

---
 README.md | 5 +++--
 main.py   | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index fea08af..82b3efb 100644
--- a/README.md
+++ b/README.md
@@ -11,8 +11,9 @@ extend. Contributions welcome.
 
 ## Installing
 
-You'll need to `pip install oursql` (this will require having the MySQL
-development libraries installed). Other than that, just run main.py.
+You'll need to `pip install oursql requests` (this will require having 
+the MySQL development libraries installed). Other than that, just run 
+main.py.
 
 ## License
 
diff --git a/main.py b/main.py
index 822e2df..31f2657 100644
--- a/main.py
+++ b/main.py
@@ -1,4 +1,4 @@
-import re, oursql
+import re, oursql, requests
 
 from sources.nzbindex import NzbindexSpider
 from sources.binsearch import BinsearchSpider

From 60825fd9fe61f6649637f89ad43e71eb0219f652 Mon Sep 17 00:00:00 2001
From: Sven Slootweg <jamsoftgamedev@gmail.com>
Date: Sun, 30 Jun 2013 03:00:44 +0200
Subject: [PATCH 02/17] Code so far

---
 .gitignore           |   2 +
 main.py              | 100 ++++++++++++++++++++++++++++++++++++++++++-
 shared.py            |  31 ++++++++++++++
 sources/__init__.py  |   3 +-
 sources/binsearch.py |   5 +++
 sources/nzbindex.py  |  62 +++++++++++++++++++++++++++
 6 files changed, 200 insertions(+), 3 deletions(-)
 create mode 100644 .gitignore
 create mode 100644 shared.py

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..936a3c8
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+*.pyc
+config.json
diff --git a/main.py b/main.py
index 31f2657..8a986bb 100644
--- a/main.py
+++ b/main.py
@@ -1,4 +1,102 @@
-import re, oursql, requests
+import re, oursql, requests, sys, json, shlex, argparse
 
 from sources.nzbindex import NzbindexSpider
 from sources.binsearch import BinsearchSpider
+from shared import NotFoundException
+
+parser = argparse.ArgumentParser(description="Automatically download NZBs for releases")
+parser.add_argument("--config", dest="config", action="store", help="Use a configuration file to match against the database as source")
+parser.add_argument("--list", dest="list", action="store", help="Use a newline-delimited list of releases as source")
+parser.add_argument("--target", dest="target", action="store", help="Where to save the NZBs (only needed in list mode)")
+parser.add_argument("--iplist", dest="list", action="store", help="Bind every request to a random IP from a newline-delimited list")
+args = parser.parse_args()
+
+if args.config is not None:
+	mode = "config"
+elif args.list is not None:
+	mode = "list"
+else:
+	sys.stderr.write("You must specify either a configuration file or a release list.\n")
+	exit(1)
+
+if mode == "config":
+	try:
+		conf = json.load(open("config.json", "r"))
+	except IOError, e:
+		sys.stderr.write("You must have a valid config.json.\n")
+		exit(1)
+	
+	if not re.match("^[a-zA-Z0-9_-]+$", conf['db']['table']):
+		sys.stderr.write("Table name must be a-z, A-Z, 0-9, _, -\n")
+		exit(1)
+	
+	try:
+		searchconf_file = open(args.config, "r")
+	except IOError, e:
+		sys.stderr.write("The specified configuration file doesn't exist.\n")
+		exit(1)
+		
+	queries = searchconf_file.read().splitlines()
+	searchconf_file.close()
+	
+	db = oursql.connect(host=conf['db']['host'], user=conf['db']['user'], passwd=conf['db']['pass'], db=conf['db']['db'], autoreconnect=True)
+	c = db.cursor()
+	
+	releases = []
+	
+	for query in queries:
+		title, section, target = shlex.split(query)
+		
+		fields = []
+		values = []
+		
+		if title != "-":
+			fields.append("`release` LIKE ?")
+			values.append("%" + title + "%")
+			
+		if section != "-":
+			fields.append("`section` LIKE ?")
+			values.append("%" + section + "%")
+		
+		if len(fields) == 0:
+			db_query = "SELECT `release` FROM %s WHERE `time` < (UNIX_TIMESTAMP(NOW()) - 86400)" % conf['db']['table']
+		else:
+			db_query = "SELECT `release` FROM %s WHERE %s AND `time` < (UNIX_TIMESTAMP(NOW()) - 86400)" % (conf['db']['table'], " AND ".join(fields))
+		
+		c.execute(db_query, values)
+		
+		for row in c:
+			releases.append((row[0], target))
+elif mode == "list":
+	if args.target is None:
+		sys.stderr.write("You did not specify a target directory with --target.\n")
+		exit(1)
+	
+	try:
+		list_file = open(args.list, "r")
+	except IOError, e:
+		sys.stderr.write("The specified list file doesn't exist.\n")
+		exit(1)
+	
+	releases = [(release, args.target) for release in list_file.read().splitlines()]
+	list_file.close()
+
+sys.stdout.write("Found %d releases.\n" % len(releases))
+
+for release in releases:
+	release_name, target_dir = release
+	
+	try:
+		spider = NzbindexSpider()
+		results = spider.find(release_name)
+	except NotFoundException, e:
+		try:
+			spider = BinsearchSpider()
+			results = spider.find(release_name)
+		except NotFoundException, e:
+			sys.stderr.write("Could not find release %s\n" % release_name)
+			continue
+			
+	# Process result
+	for result in results:
+		result.show()
diff --git a/shared.py b/shared.py
new file mode 100644
index 0000000..6611001
--- /dev/null
+++ b/shared.py
@@ -0,0 +1,31 @@
+import requests, random
+
+# These are just some random useragents, you can replace these with a different list
+user_agents = [
+	"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.110 Safari/537.36",
+	"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20100101 Firefox/21.0",
+	"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.94 Safari/537.36",
+	"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.110 Safari/537.36",
+	"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.110 Safari/537.36",
+	"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36",
+	"Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20100101 Firefox/21.0",
+	"Mozilla/5.0 (Windows NT 5.1; rv:21.0) Gecko/20100101 Firefox/21.0",
+	"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit/536.30.1 (KHTML, like Gecko) Version/6.0.5 Safari/536.30.1",
+]
+
+class NotFoundException(Exception):
+	pass
+	
+class ModifiedSession(requests.Session):
+	def __init__(self, *args, **kwargs):
+		requests.Session.__init__(self, *args, **kwargs)
+		self.headers['user-agent'] = random.choice(user_agents)
+	
+def download_file(self, request, target):
+	if request.status_code == 200:
+		f = open(target, "wb")
+		
+		for chunk in request.iter_content():
+			f.write(chunk)
+			
+		f.close()
diff --git a/sources/__init__.py b/sources/__init__.py
index 77f1e4a..8b13789 100644
--- a/sources/__init__.py
+++ b/sources/__init__.py
@@ -1,2 +1 @@
-class NzbindexSpider(object):
-	pass
+
diff --git a/sources/binsearch.py b/sources/binsearch.py
index ed9d53d..21a57d2 100644
--- a/sources/binsearch.py
+++ b/sources/binsearch.py
@@ -1,2 +1,7 @@
+from shared import NotFoundException
+
 class BinsearchSpider(object):
 	pass
+	
+class BinsearchResult(object):
+	pass
diff --git a/sources/nzbindex.py b/sources/nzbindex.py
index 8b13789..509170f 100644
--- a/sources/nzbindex.py
+++ b/sources/nzbindex.py
@@ -1 +1,63 @@
+from shared import NotFoundException, ModifiedSession, download_file
+import requests, re, HTMLParser
 
+class NzbindexSpider(object):
+	def find(self, name):
+		parser = HTMLParser.HTMLParser()
+		self.session = ModifiedSession()
+		self.session.post("https://nzbindex.com/agree/", data={"agree": "I agree"}, verify=False)
+		
+		response = self.session.get("https://nzbindex.com/search/", params={
+			"q": name,
+			"age": "",
+			"max": "50",
+			"minage": "",
+			"sort": "agedesc",
+			"minsize": "100",
+			"maxsize": "",
+			"dq": "",
+			"poster": "",
+			"nfo": "",
+			"hasnfo": "1",
+			"complete": "1",
+			"hidespam": "1",
+			"more": "1"
+		}, verify=False)
+		
+		search_results = []
+		
+		results = re.findall("<tr[^>]+>(.*?)<\/tr>", response.text, re.DOTALL)
+		
+		for result in results:
+			if 'class="threat"' in result:
+				# Password protected or otherwise unsuitable for download
+				continue
+			
+			match = re.search("<label[^>]+>(.*?)<\/label>", result, re.DOTALL)
+			
+			if match is None:
+				continue
+				
+			title = parser.unescape(re.sub("<[^>]+>", "", match.group(1)))
+			
+			if name.lower() in title.lower():
+				match = re.search('https?:\/\/nzbindex\.com\/download\/[^"]+\.nzb', result)
+				
+				if match is not None:
+					search_results.append(NzbindexResult(title, match.group(0)))
+		
+		if len(search_results) == 0:
+			raise NotFoundException("No results were found.")
+				
+		return search_results
+class NzbindexResult(object):
+	def __init__(self, title, url, spider):
+		self.title = title
+		self.url = url
+		self.spider = spider
+		
+	def show(self):
+		print "%s -> %s" % (self.title, self.url)
+		
+	def download(self, target_path):
+		download_file(self.spider.session.get(self.url), target_path)

From d14d4ea9cb3fe410547359bc86594234021bfe57 Mon Sep 17 00:00:00 2001
From: Sven Slootweg <jamsoftgamedev@gmail.com>
Date: Sun, 30 Jun 2013 15:35:42 +0200
Subject: [PATCH 03/17] Add note about sanitation

---
 README.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/README.md b/README.md
index 82b3efb..70dcd2b 100644
--- a/README.md
+++ b/README.md
@@ -15,6 +15,12 @@ You'll need to `pip install oursql requests` (this will require having
 the MySQL development libraries installed). Other than that, just run 
 main.py.
 
+## Notes
+
+The script will assume that all releasenames in your database are safe
+as a filename. No sanitation or conversion of the filenames will take
+place.
+
 ## License
 
 Licensed under the WTFPL or, if you take issue with that for some

From 696d4f1c6e513ff9ab2584769b2fdf939598804e Mon Sep 17 00:00:00 2001
From: Sven Slootweg <jamsoftgamedev@gmail.com>
Date: Sun, 30 Jun 2013 15:36:32 +0200
Subject: [PATCH 04/17] Fixes in NZBIndex spider

---
 sources/nzbindex.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/sources/nzbindex.py b/sources/nzbindex.py
index 509170f..9d4f427 100644
--- a/sources/nzbindex.py
+++ b/sources/nzbindex.py
@@ -26,30 +26,31 @@ class NzbindexSpider(object):
 		
 		search_results = []
 		
-		results = re.findall("<tr[^>]+>(.*?)<\/tr>", response.text, re.DOTALL)
+		results = re.findall("<tr[^>]*>(.*?)<\/tr>", response.text, re.DOTALL)
 		
 		for result in results:
 			if 'class="threat"' in result:
 				# Password protected or otherwise unsuitable for download
 				continue
 			
-			match = re.search("<label[^>]+>(.*?)<\/label>", result, re.DOTALL)
+			match = re.search("<label[^>]*>(.*?)<\/label>", result, re.DOTALL)
 			
 			if match is None:
 				continue
 				
-			title = parser.unescape(re.sub("<[^>]+>", "", match.group(1)))
+			title = parser.unescape(re.sub("<[^>]*>", "", match.group(1)))
 			
 			if name.lower() in title.lower():
 				match = re.search('https?:\/\/nzbindex\.com\/download\/[^"]+\.nzb', result)
 				
 				if match is not None:
-					search_results.append(NzbindexResult(title, match.group(0)))
+					search_results.append(NzbindexResult(title, match.group(0), self))
 		
 		if len(search_results) == 0:
 			raise NotFoundException("No results were found.")
 				
 		return search_results
+		
 class NzbindexResult(object):
 	def __init__(self, title, url, spider):
 		self.title = title

From 0917f06de7d39515d691159b71235cbad49ef2d8 Mon Sep 17 00:00:00 2001
From: Sven Slootweg <jamsoftgamedev@gmail.com>
Date: Sun, 30 Jun 2013 15:36:52 +0200
Subject: [PATCH 05/17] Implement BinSearch spider

---
 sources/binsearch.py | 80 ++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 77 insertions(+), 3 deletions(-)

diff --git a/sources/binsearch.py b/sources/binsearch.py
index 21a57d2..0402018 100644
--- a/sources/binsearch.py
+++ b/sources/binsearch.py
@@ -1,7 +1,81 @@
-from shared import NotFoundException
+from shared import NotFoundException, ModifiedSession, download_file
+import requests, re, HTMLParser
 
 class BinsearchSpider(object):
-	pass
+	def find(self, name):
+		parser = HTMLParser.HTMLParser()
+		self.session = ModifiedSession()
+		
+		response = self.session.get("https://binsearch.info/index.php", params={
+			"q": name,
+			"m": "",
+			"adv_age": "600",
+			"max": "100",
+			"adv_g": "",
+			"adv_sort": "date",
+			"minsize": "100",
+			"maxsize": "",
+			"adv_col": "on",
+			"adv_nfo": "on",
+			"font": "",
+			"postdate": ""
+		}, verify=False)
+		
+		search_results = []
+		
+		# Nice try, corrupting your HTML to deter scrapers. Not going to stop me, though.
+		results = re.findall('<tr[^>]+>(.*?)<a href="browse\.php', response.text, re.DOTALL)
+		
+		for result in results:
+			if 'requires password' in result:
+				# Password protected
+				continue
+			
+			match = re.search('<span[^>]*class="s"[^>]*>(.*?)<\/span>', result, re.DOTALL)
+			
+			if match is None:
+				continue
+				
+			title = parser.unescape(re.sub("<[^>]+>", "", match.group(1)))
+			
+			if name.lower() in title.lower():
+				match = re.search('<input[^>]*type="checkbox"[^>]*name="([0-9]+)"[^>]*>', result)
+				
+				if match is not None:
+					search_results.append(BinsearchResult(name, title, match.group(1), self))
+		
+		if len(search_results) == 0:
+			raise NotFoundException("No results were found.")
+				
+		return search_results
 	
 class BinsearchResult(object):
-	pass
+	def __init__(self, name, title, id_, spider):
+		self.name = name
+		self.title = title
+		self.id_ = id_
+		self.spider = spider
+	
+	def show(self):
+		print "%s -> %s (%s)" % (self.title, self.id_, self.name)
+	
+	def download(self, target_path):
+		data_dict = {"action": "nzb"}
+		data_dict[self.id_] = "on"
+		
+		response = self.spider.session.post("https://www.binsearch.info/fcgi/nzb.fcgi", params={
+			"q": self.name,
+			"m": "",
+			"adv_age": "600",
+			"max": "100",
+			"adv_g": "",
+			"adv_sort": "date",
+			"minsize": "100",
+			"maxsize": "",
+			"adv_col": "on",
+			"adv_nfo": "on",
+			"font": "",
+			"postdate": ""
+		}, data=data_dict)
+		
+		download_file(response, target_path)

From c81ffda57512dc90e86eed3840c9a837d94b48d2 Mon Sep 17 00:00:00 2001
From: Sven Slootweg <jamsoftgamedev@gmail.com>
Date: Sun, 30 Jun 2013 15:37:04 +0200
Subject: [PATCH 06/17] Finish main script

---
 main.py   | 34 +++++++++++++++++++++++++++++++---
 shared.py |  2 +-
 2 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/main.py b/main.py
index 8a986bb..d7d0032 100644
--- a/main.py
+++ b/main.py
@@ -1,4 +1,4 @@
-import re, oursql, requests, sys, json, shlex, argparse
+import re, oursql, requests, sys, json, shlex, argparse, os
 
 from sources.nzbindex import NzbindexSpider
 from sources.binsearch import BinsearchSpider
@@ -83,8 +83,25 @@ elif mode == "list":
 
 sys.stdout.write("Found %d releases.\n" % len(releases))
 
+downloaded = 0
+skipped = 0
+errors = 0
+notfound = 0
+
 for release in releases:
 	release_name, target_dir = release
+	target_path = os.path.join(target_dir, "%s.nzb" % release_name)
+	
+	if os.path.exists(target_path):
+		# This NZB was already downloaded.
+		skipped += 1
+		continue
+	
+	try:
+		os.makedirs(target_dir)
+	except OSError, e:
+		# Target directory already exists
+		pass
 	
 	try:
 		spider = NzbindexSpider()
@@ -95,8 +112,19 @@ for release in releases:
 			results = spider.find(release_name)
 		except NotFoundException, e:
 			sys.stderr.write("Could not find release %s\n" % release_name)
+			notfound += 1
 			continue
 			
 	# Process result
-	for result in results:
-		result.show()
+	result = results[1]
+	
+	try:
+		result.download(target_path)
+	except Exception, e:
+		errors += 1
+		sys.stderr.write("Downloading NZB for %s failed: %s\n" % (release_name, repr(e)))
+		
+	sys.stdout.write("Downloaded NZB for %s.\n" % release_name)
+	downloaded += 1
+
+sys.stdout.write("Finished. %d downloaded, %d skipped, %d errors and %d not found.\n" % (downloaded, skipped, errors, notfound))
diff --git a/shared.py b/shared.py
index 6611001..86af281 100644
--- a/shared.py
+++ b/shared.py
@@ -21,7 +21,7 @@ class ModifiedSession(requests.Session):
 		requests.Session.__init__(self, *args, **kwargs)
 		self.headers['user-agent'] = random.choice(user_agents)
 	
-def download_file(self, request, target):
+def download_file(request, target):
 	if request.status_code == 200:
 		f = open(target, "wb")
 		

From ab2f4e24c3c8c7c8594bab3a08adc97fc25aaaf6 Mon Sep 17 00:00:00 2001
From: Sven Slootweg <jamsoftgamedev@gmail.com>
Date: Sun, 30 Jun 2013 15:47:31 +0200
Subject: [PATCH 07/17] Add usage instructions

---
 README.md | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/README.md b/README.md
index 70dcd2b..16b7fc4 100644
--- a/README.md
+++ b/README.md
@@ -15,6 +15,53 @@ You'll need to `pip install oursql requests` (this will require having
 the MySQL development libraries installed). Other than that, just run 
 main.py.
 
+## Usage
+
+You can use nzbspider with either a release list or a configuration 
+file.
+
+### Release list
+
+This is a text file, specified with the `--list` parameter, that
+contains a newline-delimited list of release names to search for. You
+will need to use the `--target` parameter to specify what directory to
+download the NZBs to.
+
+### Configuration file
+
+This is a text file using a specific configuration syntax to select
+specific releases from a pre-filled MySQl database, to search for. To
+use this mode, you will need to copy config.json.example to config.json
+and change the database details to match yours. A (basic) database
+schema is included. Only results that are at least 24 hours old will be
+matched, regardless of your configuration.
+
+The configuration file format is as follows:
+
+* Newline-delimited, a new predicate on every line.
+* Three whitespace-delimited fields: release name, section, and target 
+  directory.
+* Enter `-` for any or both of the first two fields to match regardless
+  of the release name or section (depending on which you fill in as `-`).
+* The `%` character is used to denote a multi-character wildcard
+  anywhere in the first two fields.
+* The first two fields are enclosed in wildcard characters by default.
+* The target directory does not have to exist; it will be created if it
+  doesn't.
+* You must enclose a field value in `"` quotes if it contains a space.
+  
+An example configuration file (the real configuration format doesn't
+allow comments, so don't copy this verbatim!):
+
+	- MP3 ./mp3s             # Will select everything in section 'MP3'
+	- - ./everything         # Will select absolutely everything
+	IMMERSE - ./immerse      # Will select everything labeled 'IMMERSE'
+	Mad.Men%720p - ./madmen  # Will select every 720p episode of Mad Men
+	
+Note that these searches are run against your own database, not directly
+against the NZB indexing sites! You'll still need a list of valid 
+release names pre-filled in your database.
+
 ## Notes
 
 The script will assume that all releasenames in your database are safe

From 23d1a08d0b74a1d157d05df3644dd44c53ccd77e Mon Sep 17 00:00:00 2001
From: Sven Slootweg <jamsoftgamedev@gmail.com>
Date: Sun, 30 Jun 2013 15:49:05 +0200
Subject: [PATCH 08/17] Modifications to usage instructions

---
 README.md | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 16b7fc4..3437804 100644
--- a/README.md
+++ b/README.md
@@ -30,11 +30,14 @@ download the NZBs to.
 ### Configuration file
 
 This is a text file using a specific configuration syntax to select
-specific releases from a pre-filled MySQl database, to search for. To
-use this mode, you will need to copy config.json.example to config.json
-and change the database details to match yours. A (basic) database
-schema is included. Only results that are at least 24 hours old will be
-matched, regardless of your configuration.
+specific releases from a pre-filled MySQl database, to search for. Use
+the `--config` parameter to specify the path of the configuration file
+you wish to use.
+
+To use this mode, you will need to copy config.json.example to 
+config.json and change the database details to match yours. A (basic) 
+database schema is included. Only results that are at least 24 hours old
+will be matched, regardless of your configuration.
 
 The configuration file format is as follows:
 

From 2eb0bad5793aae4e40f20b2c28f840ee595ac6a0 Mon Sep 17 00:00:00 2001
From: Sven Slootweg <jamsoftgamedev@gmail.com>
Date: Sun, 30 Jun 2013 15:57:39 +0200
Subject: [PATCH 09/17] Include database structure

---
 structure.sql | 11 +++++++++++
 1 file changed, 11 insertions(+)
 create mode 100644 structure.sql

diff --git a/structure.sql b/structure.sql
new file mode 100644
index 0000000..ea5efd5
--- /dev/null
+++ b/structure.sql
@@ -0,0 +1,11 @@
+SET SQL_MODE="NO_AUTO_VALUE_ON_ZERO";
+SET time_zone = "+00:00";
+
+CREATE TABLE IF NOT EXISTS `releases` (
+  `releaseid` int(11) NOT NULL AUTO_INCREMENT,
+  `time` int(11) NOT NULL,
+  `section` varchar(50) COLLATE utf8_unicode_ci NOT NULL,
+  `release` varchar(255) COLLATE utf8_unicode_ci NOT NULL,
+  PRIMARY KEY (`releaseid`),
+  UNIQUE KEY `release` (`release`)
+) ENGINE=MyISAM  DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;

From efb1efdb17e291a72d4a762c4ff7059246ea3a65 Mon Sep 17 00:00:00 2001
From: Sven Slootweg <jamsoftgamedev@gmail.com>
Date: Sun, 30 Jun 2013 15:58:11 +0200
Subject: [PATCH 10/17] Add example configuration

---
 config.json.example | 9 +++++++++
 1 file changed, 9 insertions(+)
 create mode 100644 config.json.example

diff --git a/config.json.example b/config.json.example
new file mode 100644
index 0000000..0bb93dd
--- /dev/null
+++ b/config.json.example
@@ -0,0 +1,9 @@
+{
+	"db": {
+		"host": "localhost",
+		"user": "nzbspider",
+		"pass": "sekrit",
+		"db": "nzbspider",
+		"table": "releases"
+	}
+}

From 73b1881e86c4eea7be799bea1ada06d0e55dd160 Mon Sep 17 00:00:00 2001
From: Sven Slootweg <jamsoftgamedev@gmail.com>
Date: Sun, 30 Jun 2013 18:31:18 +0200
Subject: [PATCH 11/17] Don't retry NZBs that can't be found straight away, and
 limit results to last 250

---
 main.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/main.py b/main.py
index d7d0032..9317bd4 100644
--- a/main.py
+++ b/main.py
@@ -59,9 +59,9 @@ if mode == "config":
 			values.append("%" + section + "%")
 		
 		if len(fields) == 0:
-			db_query = "SELECT `release` FROM %s WHERE `time` < (UNIX_TIMESTAMP(NOW()) - 86400)" % conf['db']['table']
+			db_query = "SELECT `release` FROM %s WHERE `time` < (UNIX_TIMESTAMP(NOW()) - 86400) ORDER BY `time` DESC LIMIT 250" % conf['db']['table']
 		else:
-			db_query = "SELECT `release` FROM %s WHERE %s AND `time` < (UNIX_TIMESTAMP(NOW()) - 86400)" % (conf['db']['table'], " AND ".join(fields))
+			db_query = "SELECT `release` FROM %s WHERE %s AND `time` < (UNIX_TIMESTAMP(NOW()) - 86400) ORDER BY `time` DESC LIMIT 250" % (conf['db']['table'], " AND ".join(fields))
 		
 		c.execute(db_query, values)
 		
@@ -88,6 +88,8 @@ skipped = 0
 errors = 0
 notfound = 0
 
+notfound_list = []
+
 for release in releases:
 	release_name, target_dir = release
 	target_path = os.path.join(target_dir, "%s.nzb" % release_name)
@@ -96,6 +98,11 @@ for release in releases:
 		# This NZB was already downloaded.
 		skipped += 1
 		continue
+		
+	if release_name in notfound_list:
+		# This NZB couldn't be found before
+		notfound += 1
+		continue
 	
 	try:
 		os.makedirs(target_dir)
@@ -112,6 +119,7 @@ for release in releases:
 			results = spider.find(release_name)
 		except NotFoundException, e:
 			sys.stderr.write("Could not find release %s\n" % release_name)
+			notfound_list.append(release_name)
 			notfound += 1
 			continue
 			

From cacbd735b77cae85df36d7ab50ef685acf43a277 Mon Sep 17 00:00:00 2001
From: Sven Slootweg <jamsoftgamedev@gmail.com>
Date: Sun, 30 Jun 2013 18:37:25 +0200
Subject: [PATCH 12/17] Wow, that was stupid.

---
 main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main.py b/main.py
index 9317bd4..561fdb4 100644
--- a/main.py
+++ b/main.py
@@ -124,7 +124,7 @@ for release in releases:
 			continue
 			
 	# Process result
-	result = results[1]
+	result = results[0]
 	
 	try:
 		result.download(target_path)

From e7e6cba90ff98dca337e73e73e137ae283b13385 Mon Sep 17 00:00:00 2001
From: Sven Slootweg <jamsoftgamedev@gmail.com>
Date: Sun, 30 Jun 2013 19:39:31 +0200
Subject: [PATCH 13/17] Monkeypatch in support for binding sessions to random
 IPs using an IP list

---
 main.py              | 14 ++++++++++----
 shared.py            | 39 ++++++++++++++++++++++++++++++++++++++-
 sources/binsearch.py |  5 ++++-
 sources/nzbindex.py  |  5 ++++-
 4 files changed, 56 insertions(+), 7 deletions(-)

diff --git a/main.py b/main.py
index 561fdb4..49e5f3d 100644
--- a/main.py
+++ b/main.py
@@ -1,4 +1,4 @@
-import re, oursql, requests, sys, json, shlex, argparse, os
+import re, oursql, requests, sys, json, shlex, argparse, os, random
 
 from sources.nzbindex import NzbindexSpider
 from sources.binsearch import BinsearchSpider
@@ -8,7 +8,7 @@ parser = argparse.ArgumentParser(description="Automatically download NZBs for re
 parser.add_argument("--config", dest="config", action="store", help="Use a configuration file to match against the database as source")
 parser.add_argument("--list", dest="list", action="store", help="Use a newline-delimited list of releases as source")
 parser.add_argument("--target", dest="target", action="store", help="Where to save the NZBs (only needed in list mode)")
-parser.add_argument("--iplist", dest="list", action="store", help="Bind every request to a random IP from a newline-delimited list")
+parser.add_argument("--iplist", dest="iplist", action="store", help="Bind every request to a random IP from a newline-delimited list")
 args = parser.parse_args()
 
 if args.config is not None:
@@ -18,6 +18,12 @@ elif args.list is not None:
 else:
 	sys.stderr.write("You must specify either a configuration file or a release list.\n")
 	exit(1)
+	
+if args.iplist is not None:
+	iplist_file = open(args.iplist, "r")
+	iplist = iplist_file.read().splitlines()
+else:
+	iplist = [""]
 
 if mode == "config":
 	try:
@@ -111,11 +117,11 @@ for release in releases:
 		pass
 	
 	try:
-		spider = NzbindexSpider()
+		spider = NzbindexSpider(random.choice(iplist))
 		results = spider.find(release_name)
 	except NotFoundException, e:
 		try:
-			spider = BinsearchSpider()
+			spider = BinsearchSpider(random.choice(iplist))
 			results = spider.find(release_name)
 		except NotFoundException, e:
 			sys.stderr.write("Could not find release %s\n" % release_name)
diff --git a/shared.py b/shared.py
index 86af281..3daa11e 100644
--- a/shared.py
+++ b/shared.py
@@ -1,4 +1,4 @@
-import requests, random
+import requests, random, socket
 
 # These are just some random useragents, you can replace these with a different list
 user_agents = [
@@ -16,10 +16,47 @@ user_agents = [
 class NotFoundException(Exception):
 	pass
 	
+# Very nasty monkeypatching ahead!
+socket.real_create_connection = socket.create_connection
+
 class ModifiedSession(requests.Session):
 	def __init__(self, *args, **kwargs):
+		try:
+			self.bound_ip = kwargs['bound_ip']
+			del kwargs['bound_ip']
+		except KeyError, e:
+			self.bound_ip = ""
+			
 		requests.Session.__init__(self, *args, **kwargs)
 		self.headers['user-agent'] = random.choice(user_agents)
+		
+	def patch_socket(self):
+		socket.create_connection = get_patched_func(self.bound_ip)
+	
+	def unpatch_socket(self):
+		socket.create_connection = socket.real_create_connection
+	
+	def get(self, *args, **kwargs):
+		self.patch_socket()
+		response = requests.Session.get(self, *args, **kwargs)
+		self.unpatch_socket()
+		return response
+		
+	def post(self, *args, **kwargs):
+		self.patch_socket()
+		response = requests.Session.get(self, *args, **kwargs)
+		self.unpatch_socket()
+		return response
+
+def get_patched_func(bind_addr):
+	def set_src_addr(*args):
+		address, timeout = args[0], args[1]
+		source_address = (bind_addr, 0)
+		return socket.real_create_connection(address, timeout, source_address)
+	return set_src_addr
+	
+# You're looking at duct tape and tie-wraps. It's like your local Home
+# Depot, except in Python.
 	
 def download_file(request, target):
 	if request.status_code == 200:
diff --git a/sources/binsearch.py b/sources/binsearch.py
index 0402018..61d1b38 100644
--- a/sources/binsearch.py
+++ b/sources/binsearch.py
@@ -2,9 +2,12 @@ from shared import NotFoundException, ModifiedSession, download_file
 import requests, re, HTMLParser
 
 class BinsearchSpider(object):
+	def __init__(self, bound_ip):
+		self.bound_ip = bound_ip
+		
 	def find(self, name):
 		parser = HTMLParser.HTMLParser()
-		self.session = ModifiedSession()
+		self.session = ModifiedSession(bound_ip=self.bound_ip)
 		
 		response = self.session.get("https://binsearch.info/index.php", params={
 			"q": name,
diff --git a/sources/nzbindex.py b/sources/nzbindex.py
index 9d4f427..6a1f4ff 100644
--- a/sources/nzbindex.py
+++ b/sources/nzbindex.py
@@ -2,9 +2,12 @@ from shared import NotFoundException, ModifiedSession, download_file
 import requests, re, HTMLParser
 
 class NzbindexSpider(object):
+	def __init__(self, bound_ip):
+		self.bound_ip = bound_ip
+		
 	def find(self, name):
 		parser = HTMLParser.HTMLParser()
-		self.session = ModifiedSession()
+		self.session = ModifiedSession(bound_ip=self.bound_ip)
 		self.session.post("https://nzbindex.com/agree/", data={"agree": "I agree"}, verify=False)
 		
 		response = self.session.get("https://nzbindex.com/search/", params={

From eee7a0d25332f3d1f87e9eadbfb8554c5f9e4c77 Mon Sep 17 00:00:00 2001
From: Sven Slootweg <jamsoftgamedev@gmail.com>
Date: Sun, 30 Jun 2013 19:43:49 +0200
Subject: [PATCH 14/17] Update docs for --iplist

---
 README.md | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 3437804..a3e778b 100644
--- a/README.md
+++ b/README.md
@@ -18,7 +18,10 @@ main.py.
 ## Usage
 
 You can use nzbspider with either a release list or a configuration 
-file.
+file. Using `--iplist` you can specify a newline-delimited file that
+contains all the available IPs on your machine. nzbspider will randomly
+pick one for every search query. If not specified, the OS default is 
+used.
 
 ### Release list
 

From 0644f182013b04aae89b10f22a7859282b2a3f46 Mon Sep 17 00:00:00 2001
From: Sven Slootweg <jamsoftgamedev@gmail.com>
Date: Sun, 30 Jun 2013 20:13:27 +0200
Subject: [PATCH 15/17] Fix bugs, set correct headers, etc.

---
 main.py              |  1 +
 shared.py            | 10 ++++++++--
 sources/binsearch.py | 13 +++++++++----
 3 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/main.py b/main.py
index 49e5f3d..f9f78e9 100644
--- a/main.py
+++ b/main.py
@@ -137,6 +137,7 @@ for release in releases:
 	except Exception, e:
 		errors += 1
 		sys.stderr.write("Downloading NZB for %s failed: %s\n" % (release_name, repr(e)))
+		continue
 		
 	sys.stdout.write("Downloaded NZB for %s.\n" % release_name)
 	downloaded += 1
diff --git a/shared.py b/shared.py
index 3daa11e..4bbce69 100644
--- a/shared.py
+++ b/shared.py
@@ -11,11 +11,15 @@ user_agents = [
 	"Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20100101 Firefox/21.0",
 	"Mozilla/5.0 (Windows NT 5.1; rv:21.0) Gecko/20100101 Firefox/21.0",
 	"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit/536.30.1 (KHTML, like Gecko) Version/6.0.5 Safari/536.30.1",
+	"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.63 Safari/537.36"
 ]
 
 class NotFoundException(Exception):
 	pass
 	
+class DownloadException(Exception):
+	pass
+	
 # Very nasty monkeypatching ahead!
 socket.real_create_connection = socket.create_connection
 
@@ -28,7 +32,7 @@ class ModifiedSession(requests.Session):
 			self.bound_ip = ""
 			
 		requests.Session.__init__(self, *args, **kwargs)
-		self.headers['user-agent'] = random.choice(user_agents)
+		self.headers['User-Agent'] = random.choice(user_agents)
 		
 	def patch_socket(self):
 		socket.create_connection = get_patched_func(self.bound_ip)
@@ -44,7 +48,7 @@ class ModifiedSession(requests.Session):
 		
 	def post(self, *args, **kwargs):
 		self.patch_socket()
-		response = requests.Session.get(self, *args, **kwargs)
+		response = requests.Session.post(self, *args, **kwargs)
 		self.unpatch_socket()
 		return response
 
@@ -66,3 +70,5 @@ def download_file(request, target):
 			f.write(chunk)
 			
 		f.close()
+	else:
+		raise DownloadException("Status code was %s" % request.status_code)
diff --git a/sources/binsearch.py b/sources/binsearch.py
index 61d1b38..932d720 100644
--- a/sources/binsearch.py
+++ b/sources/binsearch.py
@@ -21,7 +21,8 @@ class BinsearchSpider(object):
 			"adv_col": "on",
 			"adv_nfo": "on",
 			"font": "",
-			"postdate": ""
+			"postdate": "",
+			"server": ""
 		}, verify=False)
 		
 		search_results = []
@@ -45,7 +46,7 @@ class BinsearchSpider(object):
 				match = re.search('<input[^>]*type="checkbox"[^>]*name="([0-9]+)"[^>]*>', result)
 				
 				if match is not None:
-					search_results.append(BinsearchResult(name, title, match.group(1), self))
+					search_results.append(BinsearchResult(name, title, match.group(1), self, response.url))
 		
 		if len(search_results) == 0:
 			raise NotFoundException("No results were found.")
@@ -53,11 +54,12 @@ class BinsearchSpider(object):
 		return search_results
 	
 class BinsearchResult(object):
-	def __init__(self, name, title, id_, spider):
+	def __init__(self, name, title, id_, spider, searchurl):
 		self.name = name
 		self.title = title
 		self.id_ = id_
 		self.spider = spider
+		self.searchurl = searchurl
 	
 	def show(self):
 		print "%s -> %s (%s)" % (self.title, self.id_, self.name)
@@ -66,6 +68,8 @@ class BinsearchResult(object):
 		data_dict = {"action": "nzb"}
 		data_dict[self.id_] = "on"
 		
+		self.spider.session.headers['Referer'] = self.searchurl
+		
 		response = self.spider.session.post("https://www.binsearch.info/fcgi/nzb.fcgi", params={
 			"q": self.name,
 			"m": "",
@@ -78,7 +82,8 @@ class BinsearchResult(object):
 			"adv_col": "on",
 			"adv_nfo": "on",
 			"font": "",
-			"postdate": ""
+			"postdate": "",
+			"server": ""
 		}, data=data_dict)
 		
 		download_file(response, target_path)

From e85eeb3bb23919f2ac434d44a917ad3b4aa8e881 Mon Sep 17 00:00:00 2001
From: Sven Slootweg <jamsoftgamedev@gmail.com>
Date: Sun, 30 Jun 2013 21:15:06 +0200
Subject: [PATCH 16/17] Let a custom limit be specified as parameter

---
 README.md | 7 ++++++-
 main.py   | 7 +++++--
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index a3e778b..99fdd87 100644
--- a/README.md
+++ b/README.md
@@ -18,7 +18,9 @@ main.py.
 ## Usage
 
 You can use nzbspider with either a release list or a configuration 
-file. Using `--iplist` you can specify a newline-delimited file that
+file. 
+
+Using `--iplist` you can specify a newline-delimited file that
 contains all the available IPs on your machine. nzbspider will randomly
 pick one for every search query. If not specified, the OS default is 
 used.
@@ -68,6 +70,9 @@ Note that these searches are run against your own database, not directly
 against the NZB indexing sites! You'll still need a list of valid 
 release names pre-filled in your database.
 
+Using `--limit` you can override the default limit of matched results.
+The default is the 250 newest results.
+
 ## Notes
 
 The script will assume that all releasenames in your database are safe
diff --git a/main.py b/main.py
index f9f78e9..939bcc4 100644
--- a/main.py
+++ b/main.py
@@ -9,6 +9,7 @@ parser.add_argument("--config", dest="config", action="store", help="Use a confi
 parser.add_argument("--list", dest="list", action="store", help="Use a newline-delimited list of releases as source")
 parser.add_argument("--target", dest="target", action="store", help="Where to save the NZBs (only needed in list mode)")
 parser.add_argument("--iplist", dest="iplist", action="store", help="Bind every request to a random IP from a newline-delimited list")
+parser.add_argument("--limit", dest="limit", action="store", help="How many records to select in configuration file mode, at most (default: 250)", default=250)
 args = parser.parse_args()
 
 if args.config is not None:
@@ -64,10 +65,12 @@ if mode == "config":
 			fields.append("`section` LIKE ?")
 			values.append("%" + section + "%")
 		
+		values.append(args.limit)
+		
 		if len(fields) == 0:
-			db_query = "SELECT `release` FROM %s WHERE `time` < (UNIX_TIMESTAMP(NOW()) - 86400) ORDER BY `time` DESC LIMIT 250" % conf['db']['table']
+			db_query = "SELECT `release` FROM %s WHERE `time` < (UNIX_TIMESTAMP(NOW()) - 86400) ORDER BY `time` DESC LIMIT ?" % conf['db']['table']
 		else:
-			db_query = "SELECT `release` FROM %s WHERE %s AND `time` < (UNIX_TIMESTAMP(NOW()) - 86400) ORDER BY `time` DESC LIMIT 250" % (conf['db']['table'], " AND ".join(fields))
+			db_query = "SELECT `release` FROM %s WHERE %s AND `time` < (UNIX_TIMESTAMP(NOW()) - 86400) ORDER BY `time` DESC LIMIT ?" % (conf['db']['table'], " AND ".join(fields))
 		
 		c.execute(db_query, values)
 		

From 974d28973dd20232e33cba058e05afbeaf91a5d7 Mon Sep 17 00:00:00 2001
From: Sven Slootweg <jamsoftgamedev@gmail.com>
Date: Sun, 30 Jun 2013 21:17:37 +0200
Subject: [PATCH 17/17] Allow for a list of to-be-skipped release names

---
 README.md |  4 ++++
 main.py   | 12 ++++++++++++
 2 files changed, 16 insertions(+)

diff --git a/README.md b/README.md
index 99fdd87..64b7f87 100644
--- a/README.md
+++ b/README.md
@@ -25,6 +25,10 @@ contains all the available IPs on your machine. nzbspider will randomly
 pick one for every search query. If not specified, the OS default is 
 used.
 
+Using `--skip` you can specify a newline-delimited file that contains
+all release names that should be skipped, no matter what. This works in
+both modes.
+
 ### Release list
 
 This is a text file, specified with the `--list` parameter, that
diff --git a/main.py b/main.py
index 939bcc4..92631fc 100644
--- a/main.py
+++ b/main.py
@@ -10,6 +10,7 @@ parser.add_argument("--list", dest="list", action="store", help="Use a newline-d
 parser.add_argument("--target", dest="target", action="store", help="Where to save the NZBs (only needed in list mode)")
 parser.add_argument("--iplist", dest="iplist", action="store", help="Bind every request to a random IP from a newline-delimited list")
 parser.add_argument("--limit", dest="limit", action="store", help="How many records to select in configuration file mode, at most (default: 250)", default=250)
+parser.add_argument("--skip", dest="skip", action="store", help="Optionally, a path to a newline-delimited list of release names to always skip")
 args = parser.parse_args()
 
 if args.config is not None:
@@ -25,6 +26,12 @@ if args.iplist is not None:
 	iplist = iplist_file.read().splitlines()
 else:
 	iplist = [""]
+	
+if args.skip is not None:
+	skip_file = open(args.skip, "r")
+	skiplist = skip_file.read().splitlines()
+else:
+	skiplist = [""]
 
 if mode == "config":
 	try:
@@ -112,6 +119,11 @@ for release in releases:
 		# This NZB couldn't be found before
 		notfound += 1
 		continue
+		
+	if release_name in skiplist:
+		# This release should be skipped
+		skipped += 1
+		continue
 	
 	try:
 		os.makedirs(target_dir)