Release 1.0
commit
4786ea0443
@ -0,0 +1,2 @@
|
|||||||
|
*.pyc
|
||||||
|
config.json
|
@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"db": {
|
||||||
|
"host": "localhost",
|
||||||
|
"user": "nzbspider",
|
||||||
|
"pass": "sekrit",
|
||||||
|
"db": "nzbspider",
|
||||||
|
"table": "releases"
|
||||||
|
}
|
||||||
|
}
|
@ -1,4 +1,160 @@
|
|||||||
import re, oursql
|
import re, oursql, requests, sys, json, shlex, argparse, os, random
|
||||||
|
|
||||||
from sources.nzbindex import NzbindexSpider
|
from sources.nzbindex import NzbindexSpider
|
||||||
from sources.binsearch import BinsearchSpider
|
from sources.binsearch import BinsearchSpider
|
||||||
|
from shared import NotFoundException
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description="Automatically download NZBs for releases")
|
||||||
|
parser.add_argument("--config", dest="config", action="store", help="Use a configuration file to match against the database as source")
|
||||||
|
parser.add_argument("--list", dest="list", action="store", help="Use a newline-delimited list of releases as source")
|
||||||
|
parser.add_argument("--target", dest="target", action="store", help="Where to save the NZBs (only needed in list mode)")
|
||||||
|
parser.add_argument("--iplist", dest="iplist", action="store", help="Bind every request to a random IP from a newline-delimited list")
|
||||||
|
parser.add_argument("--limit", dest="limit", action="store", help="How many records to select in configuration file mode, at most (default: 250)", default=250)
|
||||||
|
parser.add_argument("--skip", dest="skip", action="store", help="Optionally, a path to a newline-delimited list of release names to always skip")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.config is not None:
|
||||||
|
mode = "config"
|
||||||
|
elif args.list is not None:
|
||||||
|
mode = "list"
|
||||||
|
else:
|
||||||
|
sys.stderr.write("You must specify either a configuration file or a release list.\n")
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
if args.iplist is not None:
|
||||||
|
iplist_file = open(args.iplist, "r")
|
||||||
|
iplist = iplist_file.read().splitlines()
|
||||||
|
else:
|
||||||
|
iplist = [""]
|
||||||
|
|
||||||
|
if args.skip is not None:
|
||||||
|
skip_file = open(args.skip, "r")
|
||||||
|
skiplist = skip_file.read().splitlines()
|
||||||
|
else:
|
||||||
|
skiplist = [""]
|
||||||
|
|
||||||
|
if mode == "config":
|
||||||
|
try:
|
||||||
|
conf = json.load(open("config.json", "r"))
|
||||||
|
except IOError, e:
|
||||||
|
sys.stderr.write("You must have a valid config.json.\n")
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
if not re.match("^[a-zA-Z0-9_-]+$", conf['db']['table']):
|
||||||
|
sys.stderr.write("Table name must be a-z, A-Z, 0-9, _, -\n")
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
try:
|
||||||
|
searchconf_file = open(args.config, "r")
|
||||||
|
except IOError, e:
|
||||||
|
sys.stderr.write("The specified configuration file doesn't exist.\n")
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
queries = searchconf_file.read().splitlines()
|
||||||
|
searchconf_file.close()
|
||||||
|
|
||||||
|
db = oursql.connect(host=conf['db']['host'], user=conf['db']['user'], passwd=conf['db']['pass'], db=conf['db']['db'], autoreconnect=True)
|
||||||
|
c = db.cursor()
|
||||||
|
|
||||||
|
releases = []
|
||||||
|
|
||||||
|
for query in queries:
|
||||||
|
title, section, target = shlex.split(query)
|
||||||
|
|
||||||
|
fields = []
|
||||||
|
values = []
|
||||||
|
|
||||||
|
if title != "-":
|
||||||
|
fields.append("`release` LIKE ?")
|
||||||
|
values.append("%" + title + "%")
|
||||||
|
|
||||||
|
if section != "-":
|
||||||
|
fields.append("`section` LIKE ?")
|
||||||
|
values.append("%" + section + "%")
|
||||||
|
|
||||||
|
values.append(args.limit)
|
||||||
|
|
||||||
|
if len(fields) == 0:
|
||||||
|
db_query = "SELECT `release` FROM %s WHERE `time` < (UNIX_TIMESTAMP(NOW()) - 86400) ORDER BY `time` DESC LIMIT ?" % conf['db']['table']
|
||||||
|
else:
|
||||||
|
db_query = "SELECT `release` FROM %s WHERE %s AND `time` < (UNIX_TIMESTAMP(NOW()) - 86400) ORDER BY `time` DESC LIMIT ?" % (conf['db']['table'], " AND ".join(fields))
|
||||||
|
|
||||||
|
c.execute(db_query, values)
|
||||||
|
|
||||||
|
for row in c:
|
||||||
|
releases.append((row[0], target))
|
||||||
|
elif mode == "list":
|
||||||
|
if args.target is None:
|
||||||
|
sys.stderr.write("You did not specify a target directory with --target.\n")
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
try:
|
||||||
|
list_file = open(args.list, "r")
|
||||||
|
except IOError, e:
|
||||||
|
sys.stderr.write("The specified list file doesn't exist.\n")
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
releases = [(release, args.target) for release in list_file.read().splitlines()]
|
||||||
|
list_file.close()
|
||||||
|
|
||||||
|
sys.stdout.write("Found %d releases.\n" % len(releases))
|
||||||
|
|
||||||
|
downloaded = 0
|
||||||
|
skipped = 0
|
||||||
|
errors = 0
|
||||||
|
notfound = 0
|
||||||
|
|
||||||
|
notfound_list = []
|
||||||
|
|
||||||
|
for release in releases:
|
||||||
|
release_name, target_dir = release
|
||||||
|
target_path = os.path.join(target_dir, "%s.nzb" % release_name)
|
||||||
|
|
||||||
|
if os.path.exists(target_path):
|
||||||
|
# This NZB was already downloaded.
|
||||||
|
skipped += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
if release_name in notfound_list:
|
||||||
|
# This NZB couldn't be found before
|
||||||
|
notfound += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
if release_name in skiplist:
|
||||||
|
# This release should be skipped
|
||||||
|
skipped += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
os.makedirs(target_dir)
|
||||||
|
except OSError, e:
|
||||||
|
# Target directory already exists
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
spider = NzbindexSpider(random.choice(iplist))
|
||||||
|
results = spider.find(release_name)
|
||||||
|
except NotFoundException, e:
|
||||||
|
try:
|
||||||
|
spider = BinsearchSpider(random.choice(iplist))
|
||||||
|
results = spider.find(release_name)
|
||||||
|
except NotFoundException, e:
|
||||||
|
sys.stderr.write("Could not find release %s\n" % release_name)
|
||||||
|
notfound_list.append(release_name)
|
||||||
|
notfound += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Process result
|
||||||
|
result = results[0]
|
||||||
|
|
||||||
|
try:
|
||||||
|
result.download(target_path)
|
||||||
|
except Exception, e:
|
||||||
|
errors += 1
|
||||||
|
sys.stderr.write("Downloading NZB for %s failed: %s\n" % (release_name, repr(e)))
|
||||||
|
continue
|
||||||
|
|
||||||
|
sys.stdout.write("Downloaded NZB for %s.\n" % release_name)
|
||||||
|
downloaded += 1
|
||||||
|
|
||||||
|
sys.stdout.write("Finished. %d downloaded, %d skipped, %d errors and %d not found.\n" % (downloaded, skipped, errors, notfound))
|
||||||
|
@ -0,0 +1,74 @@
|
|||||||
|
import requests, random, socket
|
||||||
|
|
||||||
|
# These are just some random useragents, you can replace these with a different list
|
||||||
|
user_agents = [
|
||||||
|
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.110 Safari/537.36",
|
||||||
|
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20100101 Firefox/21.0",
|
||||||
|
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.94 Safari/537.36",
|
||||||
|
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.110 Safari/537.36",
|
||||||
|
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.110 Safari/537.36",
|
||||||
|
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36",
|
||||||
|
"Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20100101 Firefox/21.0",
|
||||||
|
"Mozilla/5.0 (Windows NT 5.1; rv:21.0) Gecko/20100101 Firefox/21.0",
|
||||||
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit/536.30.1 (KHTML, like Gecko) Version/6.0.5 Safari/536.30.1",
|
||||||
|
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.63 Safari/537.36"
|
||||||
|
]
|
||||||
|
|
||||||
|
class NotFoundException(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class DownloadException(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Very nasty monkeypatching ahead!
|
||||||
|
socket.real_create_connection = socket.create_connection
|
||||||
|
|
||||||
|
class ModifiedSession(requests.Session):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
try:
|
||||||
|
self.bound_ip = kwargs['bound_ip']
|
||||||
|
del kwargs['bound_ip']
|
||||||
|
except KeyError, e:
|
||||||
|
self.bound_ip = ""
|
||||||
|
|
||||||
|
requests.Session.__init__(self, *args, **kwargs)
|
||||||
|
self.headers['User-Agent'] = random.choice(user_agents)
|
||||||
|
|
||||||
|
def patch_socket(self):
|
||||||
|
socket.create_connection = get_patched_func(self.bound_ip)
|
||||||
|
|
||||||
|
def unpatch_socket(self):
|
||||||
|
socket.create_connection = socket.real_create_connection
|
||||||
|
|
||||||
|
def get(self, *args, **kwargs):
|
||||||
|
self.patch_socket()
|
||||||
|
response = requests.Session.get(self, *args, **kwargs)
|
||||||
|
self.unpatch_socket()
|
||||||
|
return response
|
||||||
|
|
||||||
|
def post(self, *args, **kwargs):
|
||||||
|
self.patch_socket()
|
||||||
|
response = requests.Session.post(self, *args, **kwargs)
|
||||||
|
self.unpatch_socket()
|
||||||
|
return response
|
||||||
|
|
||||||
|
def get_patched_func(bind_addr):
|
||||||
|
def set_src_addr(*args):
|
||||||
|
address, timeout = args[0], args[1]
|
||||||
|
source_address = (bind_addr, 0)
|
||||||
|
return socket.real_create_connection(address, timeout, source_address)
|
||||||
|
return set_src_addr
|
||||||
|
|
||||||
|
# You're looking at duct tape and tie-wraps. It's like your local Home
|
||||||
|
# Depot, except in Python.
|
||||||
|
|
||||||
|
def download_file(request, target):
|
||||||
|
if request.status_code == 200:
|
||||||
|
f = open(target, "wb")
|
||||||
|
|
||||||
|
for chunk in request.iter_content():
|
||||||
|
f.write(chunk)
|
||||||
|
|
||||||
|
f.close()
|
||||||
|
else:
|
||||||
|
raise DownloadException("Status code was %s" % request.status_code)
|
@ -1,2 +1 @@
|
|||||||
class NzbindexSpider(object):
|
|
||||||
pass
|
|
||||||
|
@ -1,2 +1,89 @@
|
|||||||
|
from shared import NotFoundException, ModifiedSession, download_file
|
||||||
|
import requests, re, HTMLParser
|
||||||
|
|
||||||
class BinsearchSpider(object):
|
class BinsearchSpider(object):
|
||||||
pass
|
def __init__(self, bound_ip):
|
||||||
|
self.bound_ip = bound_ip
|
||||||
|
|
||||||
|
def find(self, name):
|
||||||
|
parser = HTMLParser.HTMLParser()
|
||||||
|
self.session = ModifiedSession(bound_ip=self.bound_ip)
|
||||||
|
|
||||||
|
response = self.session.get("https://binsearch.info/index.php", params={
|
||||||
|
"q": name,
|
||||||
|
"m": "",
|
||||||
|
"adv_age": "600",
|
||||||
|
"max": "100",
|
||||||
|
"adv_g": "",
|
||||||
|
"adv_sort": "date",
|
||||||
|
"minsize": "100",
|
||||||
|
"maxsize": "",
|
||||||
|
"adv_col": "on",
|
||||||
|
"adv_nfo": "on",
|
||||||
|
"font": "",
|
||||||
|
"postdate": "",
|
||||||
|
"server": ""
|
||||||
|
}, verify=False)
|
||||||
|
|
||||||
|
search_results = []
|
||||||
|
|
||||||
|
# Nice try, corrupting your HTML to deter scrapers. Not going to stop me, though.
|
||||||
|
results = re.findall('<tr[^>]+>(.*?)<a href="browse\.php', response.text, re.DOTALL)
|
||||||
|
|
||||||
|
for result in results:
|
||||||
|
if 'requires password' in result:
|
||||||
|
# Password protected
|
||||||
|
continue
|
||||||
|
|
||||||
|
match = re.search('<span[^>]*class="s"[^>]*>(.*?)<\/span>', result, re.DOTALL)
|
||||||
|
|
||||||
|
if match is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
title = parser.unescape(re.sub("<[^>]+>", "", match.group(1)))
|
||||||
|
|
||||||
|
if name.lower() in title.lower():
|
||||||
|
match = re.search('<input[^>]*type="checkbox"[^>]*name="([0-9]+)"[^>]*>', result)
|
||||||
|
|
||||||
|
if match is not None:
|
||||||
|
search_results.append(BinsearchResult(name, title, match.group(1), self, response.url))
|
||||||
|
|
||||||
|
if len(search_results) == 0:
|
||||||
|
raise NotFoundException("No results were found.")
|
||||||
|
|
||||||
|
return search_results
|
||||||
|
|
||||||
|
class BinsearchResult(object):
|
||||||
|
def __init__(self, name, title, id_, spider, searchurl):
|
||||||
|
self.name = name
|
||||||
|
self.title = title
|
||||||
|
self.id_ = id_
|
||||||
|
self.spider = spider
|
||||||
|
self.searchurl = searchurl
|
||||||
|
|
||||||
|
def show(self):
|
||||||
|
print "%s -> %s (%s)" % (self.title, self.id_, self.name)
|
||||||
|
|
||||||
|
def download(self, target_path):
|
||||||
|
data_dict = {"action": "nzb"}
|
||||||
|
data_dict[self.id_] = "on"
|
||||||
|
|
||||||
|
self.spider.session.headers['Referer'] = self.searchurl
|
||||||
|
|
||||||
|
response = self.spider.session.post("https://www.binsearch.info/fcgi/nzb.fcgi", params={
|
||||||
|
"q": self.name,
|
||||||
|
"m": "",
|
||||||
|
"adv_age": "600",
|
||||||
|
"max": "100",
|
||||||
|
"adv_g": "",
|
||||||
|
"adv_sort": "date",
|
||||||
|
"minsize": "100",
|
||||||
|
"maxsize": "",
|
||||||
|
"adv_col": "on",
|
||||||
|
"adv_nfo": "on",
|
||||||
|
"font": "",
|
||||||
|
"postdate": "",
|
||||||
|
"server": ""
|
||||||
|
}, data=data_dict)
|
||||||
|
|
||||||
|
download_file(response, target_path)
|
||||||
|
@ -1 +1,67 @@
|
|||||||
|
from shared import NotFoundException, ModifiedSession, download_file
|
||||||
|
import requests, re, HTMLParser
|
||||||
|
|
||||||
|
class NzbindexSpider(object):
|
||||||
|
def __init__(self, bound_ip):
|
||||||
|
self.bound_ip = bound_ip
|
||||||
|
|
||||||
|
def find(self, name):
|
||||||
|
parser = HTMLParser.HTMLParser()
|
||||||
|
self.session = ModifiedSession(bound_ip=self.bound_ip)
|
||||||
|
self.session.post("https://nzbindex.com/agree/", data={"agree": "I agree"}, verify=False)
|
||||||
|
|
||||||
|
response = self.session.get("https://nzbindex.com/search/", params={
|
||||||
|
"q": name,
|
||||||
|
"age": "",
|
||||||
|
"max": "50",
|
||||||
|
"minage": "",
|
||||||
|
"sort": "agedesc",
|
||||||
|
"minsize": "100",
|
||||||
|
"maxsize": "",
|
||||||
|
"dq": "",
|
||||||
|
"poster": "",
|
||||||
|
"nfo": "",
|
||||||
|
"hasnfo": "1",
|
||||||
|
"complete": "1",
|
||||||
|
"hidespam": "1",
|
||||||
|
"more": "1"
|
||||||
|
}, verify=False)
|
||||||
|
|
||||||
|
search_results = []
|
||||||
|
|
||||||
|
results = re.findall("<tr[^>]*>(.*?)<\/tr>", response.text, re.DOTALL)
|
||||||
|
|
||||||
|
for result in results:
|
||||||
|
if 'class="threat"' in result:
|
||||||
|
# Password protected or otherwise unsuitable for download
|
||||||
|
continue
|
||||||
|
|
||||||
|
match = re.search("<label[^>]*>(.*?)<\/label>", result, re.DOTALL)
|
||||||
|
|
||||||
|
if match is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
title = parser.unescape(re.sub("<[^>]*>", "", match.group(1)))
|
||||||
|
|
||||||
|
if name.lower() in title.lower():
|
||||||
|
match = re.search('https?:\/\/nzbindex\.com\/download\/[^"]+\.nzb', result)
|
||||||
|
|
||||||
|
if match is not None:
|
||||||
|
search_results.append(NzbindexResult(title, match.group(0), self))
|
||||||
|
|
||||||
|
if len(search_results) == 0:
|
||||||
|
raise NotFoundException("No results were found.")
|
||||||
|
|
||||||
|
return search_results
|
||||||
|
|
||||||
|
class NzbindexResult(object):
|
||||||
|
def __init__(self, title, url, spider):
|
||||||
|
self.title = title
|
||||||
|
self.url = url
|
||||||
|
self.spider = spider
|
||||||
|
|
||||||
|
def show(self):
|
||||||
|
print "%s -> %s" % (self.title, self.url)
|
||||||
|
|
||||||
|
def download(self, target_path):
|
||||||
|
download_file(self.spider.session.get(self.url), target_path)
|
||||||
|
@ -0,0 +1,11 @@
|
|||||||
|
SET SQL_MODE="NO_AUTO_VALUE_ON_ZERO";
|
||||||
|
SET time_zone = "+00:00";
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS `releases` (
|
||||||
|
`releaseid` int(11) NOT NULL AUTO_INCREMENT,
|
||||||
|
`time` int(11) NOT NULL,
|
||||||
|
`section` varchar(50) COLLATE utf8_unicode_ci NOT NULL,
|
||||||
|
`release` varchar(255) COLLATE utf8_unicode_ci NOT NULL,
|
||||||
|
PRIMARY KEY (`releaseid`),
|
||||||
|
UNIQUE KEY `release` (`release`)
|
||||||
|
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;
|
Loading…
Reference in New Issue