Release 1.0
commit
4786ea0443
@ -0,0 +1,2 @@
|
||||
*.pyc
|
||||
config.json
|
@ -0,0 +1,9 @@
|
||||
{
|
||||
"db": {
|
||||
"host": "localhost",
|
||||
"user": "nzbspider",
|
||||
"pass": "sekrit",
|
||||
"db": "nzbspider",
|
||||
"table": "releases"
|
||||
}
|
||||
}
|
@ -1,4 +1,160 @@
|
||||
import re, oursql
|
||||
import re, oursql, requests, sys, json, shlex, argparse, os, random
|
||||
|
||||
from sources.nzbindex import NzbindexSpider
|
||||
from sources.binsearch import BinsearchSpider
|
||||
from shared import NotFoundException
|
||||
|
||||
parser = argparse.ArgumentParser(description="Automatically download NZBs for releases")
|
||||
parser.add_argument("--config", dest="config", action="store", help="Use a configuration file to match against the database as source")
|
||||
parser.add_argument("--list", dest="list", action="store", help="Use a newline-delimited list of releases as source")
|
||||
parser.add_argument("--target", dest="target", action="store", help="Where to save the NZBs (only needed in list mode)")
|
||||
parser.add_argument("--iplist", dest="iplist", action="store", help="Bind every request to a random IP from a newline-delimited list")
|
||||
parser.add_argument("--limit", dest="limit", action="store", help="How many records to select in configuration file mode, at most (default: 250)", default=250)
|
||||
parser.add_argument("--skip", dest="skip", action="store", help="Optionally, a path to a newline-delimited list of release names to always skip")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.config is not None:
|
||||
mode = "config"
|
||||
elif args.list is not None:
|
||||
mode = "list"
|
||||
else:
|
||||
sys.stderr.write("You must specify either a configuration file or a release list.\n")
|
||||
exit(1)
|
||||
|
||||
if args.iplist is not None:
|
||||
iplist_file = open(args.iplist, "r")
|
||||
iplist = iplist_file.read().splitlines()
|
||||
else:
|
||||
iplist = [""]
|
||||
|
||||
if args.skip is not None:
|
||||
skip_file = open(args.skip, "r")
|
||||
skiplist = skip_file.read().splitlines()
|
||||
else:
|
||||
skiplist = [""]
|
||||
|
||||
if mode == "config":
|
||||
try:
|
||||
conf = json.load(open("config.json", "r"))
|
||||
except IOError, e:
|
||||
sys.stderr.write("You must have a valid config.json.\n")
|
||||
exit(1)
|
||||
|
||||
if not re.match("^[a-zA-Z0-9_-]+$", conf['db']['table']):
|
||||
sys.stderr.write("Table name must be a-z, A-Z, 0-9, _, -\n")
|
||||
exit(1)
|
||||
|
||||
try:
|
||||
searchconf_file = open(args.config, "r")
|
||||
except IOError, e:
|
||||
sys.stderr.write("The specified configuration file doesn't exist.\n")
|
||||
exit(1)
|
||||
|
||||
queries = searchconf_file.read().splitlines()
|
||||
searchconf_file.close()
|
||||
|
||||
db = oursql.connect(host=conf['db']['host'], user=conf['db']['user'], passwd=conf['db']['pass'], db=conf['db']['db'], autoreconnect=True)
|
||||
c = db.cursor()
|
||||
|
||||
releases = []
|
||||
|
||||
for query in queries:
|
||||
title, section, target = shlex.split(query)
|
||||
|
||||
fields = []
|
||||
values = []
|
||||
|
||||
if title != "-":
|
||||
fields.append("`release` LIKE ?")
|
||||
values.append("%" + title + "%")
|
||||
|
||||
if section != "-":
|
||||
fields.append("`section` LIKE ?")
|
||||
values.append("%" + section + "%")
|
||||
|
||||
values.append(args.limit)
|
||||
|
||||
if len(fields) == 0:
|
||||
db_query = "SELECT `release` FROM %s WHERE `time` < (UNIX_TIMESTAMP(NOW()) - 86400) ORDER BY `time` DESC LIMIT ?" % conf['db']['table']
|
||||
else:
|
||||
db_query = "SELECT `release` FROM %s WHERE %s AND `time` < (UNIX_TIMESTAMP(NOW()) - 86400) ORDER BY `time` DESC LIMIT ?" % (conf['db']['table'], " AND ".join(fields))
|
||||
|
||||
c.execute(db_query, values)
|
||||
|
||||
for row in c:
|
||||
releases.append((row[0], target))
|
||||
elif mode == "list":
|
||||
if args.target is None:
|
||||
sys.stderr.write("You did not specify a target directory with --target.\n")
|
||||
exit(1)
|
||||
|
||||
try:
|
||||
list_file = open(args.list, "r")
|
||||
except IOError, e:
|
||||
sys.stderr.write("The specified list file doesn't exist.\n")
|
||||
exit(1)
|
||||
|
||||
releases = [(release, args.target) for release in list_file.read().splitlines()]
|
||||
list_file.close()
|
||||
|
||||
sys.stdout.write("Found %d releases.\n" % len(releases))
|
||||
|
||||
downloaded = 0
|
||||
skipped = 0
|
||||
errors = 0
|
||||
notfound = 0
|
||||
|
||||
notfound_list = []
|
||||
|
||||
for release in releases:
|
||||
release_name, target_dir = release
|
||||
target_path = os.path.join(target_dir, "%s.nzb" % release_name)
|
||||
|
||||
if os.path.exists(target_path):
|
||||
# This NZB was already downloaded.
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
if release_name in notfound_list:
|
||||
# This NZB couldn't be found before
|
||||
notfound += 1
|
||||
continue
|
||||
|
||||
if release_name in skiplist:
|
||||
# This release should be skipped
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
try:
|
||||
os.makedirs(target_dir)
|
||||
except OSError, e:
|
||||
# Target directory already exists
|
||||
pass
|
||||
|
||||
try:
|
||||
spider = NzbindexSpider(random.choice(iplist))
|
||||
results = spider.find(release_name)
|
||||
except NotFoundException, e:
|
||||
try:
|
||||
spider = BinsearchSpider(random.choice(iplist))
|
||||
results = spider.find(release_name)
|
||||
except NotFoundException, e:
|
||||
sys.stderr.write("Could not find release %s\n" % release_name)
|
||||
notfound_list.append(release_name)
|
||||
notfound += 1
|
||||
continue
|
||||
|
||||
# Process result
|
||||
result = results[0]
|
||||
|
||||
try:
|
||||
result.download(target_path)
|
||||
except Exception, e:
|
||||
errors += 1
|
||||
sys.stderr.write("Downloading NZB for %s failed: %s\n" % (release_name, repr(e)))
|
||||
continue
|
||||
|
||||
sys.stdout.write("Downloaded NZB for %s.\n" % release_name)
|
||||
downloaded += 1
|
||||
|
||||
sys.stdout.write("Finished. %d downloaded, %d skipped, %d errors and %d not found.\n" % (downloaded, skipped, errors, notfound))
|
||||
|
@ -0,0 +1,74 @@
|
||||
import requests, random, socket
|
||||
|
||||
# These are just some random useragents, you can replace these with a different list
|
||||
user_agents = [
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.110 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20100101 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.94 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.110 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.110 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20100101 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 5.1; rv:21.0) Gecko/20100101 Firefox/21.0",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit/536.30.1 (KHTML, like Gecko) Version/6.0.5 Safari/536.30.1",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.63 Safari/537.36"
|
||||
]
|
||||
|
||||
class NotFoundException(Exception):
|
||||
pass
|
||||
|
||||
class DownloadException(Exception):
|
||||
pass
|
||||
|
||||
# Very nasty monkeypatching ahead!
|
||||
socket.real_create_connection = socket.create_connection
|
||||
|
||||
class ModifiedSession(requests.Session):
|
||||
def __init__(self, *args, **kwargs):
|
||||
try:
|
||||
self.bound_ip = kwargs['bound_ip']
|
||||
del kwargs['bound_ip']
|
||||
except KeyError, e:
|
||||
self.bound_ip = ""
|
||||
|
||||
requests.Session.__init__(self, *args, **kwargs)
|
||||
self.headers['User-Agent'] = random.choice(user_agents)
|
||||
|
||||
def patch_socket(self):
|
||||
socket.create_connection = get_patched_func(self.bound_ip)
|
||||
|
||||
def unpatch_socket(self):
|
||||
socket.create_connection = socket.real_create_connection
|
||||
|
||||
def get(self, *args, **kwargs):
|
||||
self.patch_socket()
|
||||
response = requests.Session.get(self, *args, **kwargs)
|
||||
self.unpatch_socket()
|
||||
return response
|
||||
|
||||
def post(self, *args, **kwargs):
|
||||
self.patch_socket()
|
||||
response = requests.Session.post(self, *args, **kwargs)
|
||||
self.unpatch_socket()
|
||||
return response
|
||||
|
||||
def get_patched_func(bind_addr):
|
||||
def set_src_addr(*args):
|
||||
address, timeout = args[0], args[1]
|
||||
source_address = (bind_addr, 0)
|
||||
return socket.real_create_connection(address, timeout, source_address)
|
||||
return set_src_addr
|
||||
|
||||
# You're looking at duct tape and tie-wraps. It's like your local Home
|
||||
# Depot, except in Python.
|
||||
|
||||
def download_file(request, target):
|
||||
if request.status_code == 200:
|
||||
f = open(target, "wb")
|
||||
|
||||
for chunk in request.iter_content():
|
||||
f.write(chunk)
|
||||
|
||||
f.close()
|
||||
else:
|
||||
raise DownloadException("Status code was %s" % request.status_code)
|
@ -1,2 +1 @@
|
||||
class NzbindexSpider(object):
|
||||
pass
|
||||
|
||||
|
@ -1,2 +1,89 @@
|
||||
from shared import NotFoundException, ModifiedSession, download_file
|
||||
import requests, re, HTMLParser
|
||||
|
||||
class BinsearchSpider(object):
|
||||
pass
|
||||
def __init__(self, bound_ip):
|
||||
self.bound_ip = bound_ip
|
||||
|
||||
def find(self, name):
|
||||
parser = HTMLParser.HTMLParser()
|
||||
self.session = ModifiedSession(bound_ip=self.bound_ip)
|
||||
|
||||
response = self.session.get("https://binsearch.info/index.php", params={
|
||||
"q": name,
|
||||
"m": "",
|
||||
"adv_age": "600",
|
||||
"max": "100",
|
||||
"adv_g": "",
|
||||
"adv_sort": "date",
|
||||
"minsize": "100",
|
||||
"maxsize": "",
|
||||
"adv_col": "on",
|
||||
"adv_nfo": "on",
|
||||
"font": "",
|
||||
"postdate": "",
|
||||
"server": ""
|
||||
}, verify=False)
|
||||
|
||||
search_results = []
|
||||
|
||||
# Nice try, corrupting your HTML to deter scrapers. Not going to stop me, though.
|
||||
results = re.findall('<tr[^>]+>(.*?)<a href="browse\.php', response.text, re.DOTALL)
|
||||
|
||||
for result in results:
|
||||
if 'requires password' in result:
|
||||
# Password protected
|
||||
continue
|
||||
|
||||
match = re.search('<span[^>]*class="s"[^>]*>(.*?)<\/span>', result, re.DOTALL)
|
||||
|
||||
if match is None:
|
||||
continue
|
||||
|
||||
title = parser.unescape(re.sub("<[^>]+>", "", match.group(1)))
|
||||
|
||||
if name.lower() in title.lower():
|
||||
match = re.search('<input[^>]*type="checkbox"[^>]*name="([0-9]+)"[^>]*>', result)
|
||||
|
||||
if match is not None:
|
||||
search_results.append(BinsearchResult(name, title, match.group(1), self, response.url))
|
||||
|
||||
if len(search_results) == 0:
|
||||
raise NotFoundException("No results were found.")
|
||||
|
||||
return search_results
|
||||
|
||||
class BinsearchResult(object):
|
||||
def __init__(self, name, title, id_, spider, searchurl):
|
||||
self.name = name
|
||||
self.title = title
|
||||
self.id_ = id_
|
||||
self.spider = spider
|
||||
self.searchurl = searchurl
|
||||
|
||||
def show(self):
|
||||
print "%s -> %s (%s)" % (self.title, self.id_, self.name)
|
||||
|
||||
def download(self, target_path):
|
||||
data_dict = {"action": "nzb"}
|
||||
data_dict[self.id_] = "on"
|
||||
|
||||
self.spider.session.headers['Referer'] = self.searchurl
|
||||
|
||||
response = self.spider.session.post("https://www.binsearch.info/fcgi/nzb.fcgi", params={
|
||||
"q": self.name,
|
||||
"m": "",
|
||||
"adv_age": "600",
|
||||
"max": "100",
|
||||
"adv_g": "",
|
||||
"adv_sort": "date",
|
||||
"minsize": "100",
|
||||
"maxsize": "",
|
||||
"adv_col": "on",
|
||||
"adv_nfo": "on",
|
||||
"font": "",
|
||||
"postdate": "",
|
||||
"server": ""
|
||||
}, data=data_dict)
|
||||
|
||||
download_file(response, target_path)
|
||||
|
@ -1 +1,67 @@
|
||||
from shared import NotFoundException, ModifiedSession, download_file
|
||||
import requests, re, HTMLParser
|
||||
|
||||
class NzbindexSpider(object):
|
||||
def __init__(self, bound_ip):
|
||||
self.bound_ip = bound_ip
|
||||
|
||||
def find(self, name):
|
||||
parser = HTMLParser.HTMLParser()
|
||||
self.session = ModifiedSession(bound_ip=self.bound_ip)
|
||||
self.session.post("https://nzbindex.com/agree/", data={"agree": "I agree"}, verify=False)
|
||||
|
||||
response = self.session.get("https://nzbindex.com/search/", params={
|
||||
"q": name,
|
||||
"age": "",
|
||||
"max": "50",
|
||||
"minage": "",
|
||||
"sort": "agedesc",
|
||||
"minsize": "100",
|
||||
"maxsize": "",
|
||||
"dq": "",
|
||||
"poster": "",
|
||||
"nfo": "",
|
||||
"hasnfo": "1",
|
||||
"complete": "1",
|
||||
"hidespam": "1",
|
||||
"more": "1"
|
||||
}, verify=False)
|
||||
|
||||
search_results = []
|
||||
|
||||
results = re.findall("<tr[^>]*>(.*?)<\/tr>", response.text, re.DOTALL)
|
||||
|
||||
for result in results:
|
||||
if 'class="threat"' in result:
|
||||
# Password protected or otherwise unsuitable for download
|
||||
continue
|
||||
|
||||
match = re.search("<label[^>]*>(.*?)<\/label>", result, re.DOTALL)
|
||||
|
||||
if match is None:
|
||||
continue
|
||||
|
||||
title = parser.unescape(re.sub("<[^>]*>", "", match.group(1)))
|
||||
|
||||
if name.lower() in title.lower():
|
||||
match = re.search('https?:\/\/nzbindex\.com\/download\/[^"]+\.nzb', result)
|
||||
|
||||
if match is not None:
|
||||
search_results.append(NzbindexResult(title, match.group(0), self))
|
||||
|
||||
if len(search_results) == 0:
|
||||
raise NotFoundException("No results were found.")
|
||||
|
||||
return search_results
|
||||
|
||||
class NzbindexResult(object):
|
||||
def __init__(self, title, url, spider):
|
||||
self.title = title
|
||||
self.url = url
|
||||
self.spider = spider
|
||||
|
||||
def show(self):
|
||||
print "%s -> %s" % (self.title, self.url)
|
||||
|
||||
def download(self, target_path):
|
||||
download_file(self.spider.session.get(self.url), target_path)
|
||||
|
@ -0,0 +1,11 @@
|
||||
SET SQL_MODE="NO_AUTO_VALUE_ON_ZERO";
|
||||
SET time_zone = "+00:00";
|
||||
|
||||
CREATE TABLE IF NOT EXISTS `releases` (
|
||||
`releaseid` int(11) NOT NULL AUTO_INCREMENT,
|
||||
`time` int(11) NOT NULL,
|
||||
`section` varchar(50) COLLATE utf8_unicode_ci NOT NULL,
|
||||
`release` varchar(255) COLLATE utf8_unicode_ci NOT NULL,
|
||||
PRIMARY KEY (`releaseid`),
|
||||
UNIQUE KEY `release` (`release`)
|
||||
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;
|
Loading…
Reference in New Issue