Code so far

master
Sven Slootweg 11 years ago
parent d502f11151
commit 60825fd9fe

2
.gitignore vendored

@ -0,0 +1,2 @@
*.pyc
config.json

@ -1,4 +1,102 @@
import re, oursql, requests import re, oursql, requests, sys, json, shlex, argparse
from sources.nzbindex import NzbindexSpider from sources.nzbindex import NzbindexSpider
from sources.binsearch import BinsearchSpider from sources.binsearch import BinsearchSpider
from shared import NotFoundException
parser = argparse.ArgumentParser(description="Automatically download NZBs for releases")
parser.add_argument("--config", dest="config", action="store", help="Use a configuration file to match against the database as source")
parser.add_argument("--list", dest="list", action="store", help="Use a newline-delimited list of releases as source")
parser.add_argument("--target", dest="target", action="store", help="Where to save the NZBs (only needed in list mode)")
parser.add_argument("--iplist", dest="list", action="store", help="Bind every request to a random IP from a newline-delimited list")
args = parser.parse_args()
if args.config is not None:
mode = "config"
elif args.list is not None:
mode = "list"
else:
sys.stderr.write("You must specify either a configuration file or a release list.\n")
exit(1)
if mode == "config":
try:
conf = json.load(open("config.json", "r"))
except IOError, e:
sys.stderr.write("You must have a valid config.json.\n")
exit(1)
if not re.match("^[a-zA-Z0-9_-]+$", conf['db']['table']):
sys.stderr.write("Table name must be a-z, A-Z, 0-9, _, -\n")
exit(1)
try:
searchconf_file = open(args.config, "r")
except IOError, e:
sys.stderr.write("The specified configuration file doesn't exist.\n")
exit(1)
queries = searchconf_file.read().splitlines()
searchconf_file.close()
db = oursql.connect(host=conf['db']['host'], user=conf['db']['user'], passwd=conf['db']['pass'], db=conf['db']['db'], autoreconnect=True)
c = db.cursor()
releases = []
for query in queries:
title, section, target = shlex.split(query)
fields = []
values = []
if title != "-":
fields.append("`release` LIKE ?")
values.append("%" + title + "%")
if section != "-":
fields.append("`section` LIKE ?")
values.append("%" + section + "%")
if len(fields) == 0:
db_query = "SELECT `release` FROM %s WHERE `time` < (UNIX_TIMESTAMP(NOW()) - 86400)" % conf['db']['table']
else:
db_query = "SELECT `release` FROM %s WHERE %s AND `time` < (UNIX_TIMESTAMP(NOW()) - 86400)" % (conf['db']['table'], " AND ".join(fields))
c.execute(db_query, values)
for row in c:
releases.append((row[0], target))
elif mode == "list":
if args.target is None:
sys.stderr.write("You did not specify a target directory with --target.\n")
exit(1)
try:
list_file = open(args.list, "r")
except IOError, e:
sys.stderr.write("The specified list file doesn't exist.\n")
exit(1)
releases = [(release, args.target) for release in list_file.read().splitlines()]
list_file.close()
sys.stdout.write("Found %d releases.\n" % len(releases))
for release in releases:
release_name, target_dir = release
try:
spider = NzbindexSpider()
results = spider.find(release_name)
except NotFoundException, e:
try:
spider = BinsearchSpider()
results = spider.find(release_name)
except NotFoundException, e:
sys.stderr.write("Could not find release %s\n" % release_name)
continue
# Process result
for result in results:
result.show()

@ -0,0 +1,31 @@
import requests, random
# These are just some random useragents, you can replace these with a different list
user_agents = [
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.110 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20100101 Firefox/21.0",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.94 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.110 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.110 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20100101 Firefox/21.0",
"Mozilla/5.0 (Windows NT 5.1; rv:21.0) Gecko/20100101 Firefox/21.0",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit/536.30.1 (KHTML, like Gecko) Version/6.0.5 Safari/536.30.1",
]
class NotFoundException(Exception):
pass
class ModifiedSession(requests.Session):
def __init__(self, *args, **kwargs):
requests.Session.__init__(self, *args, **kwargs)
self.headers['user-agent'] = random.choice(user_agents)
def download_file(self, request, target):
if request.status_code == 200:
f = open(target, "wb")
for chunk in request.iter_content():
f.write(chunk)
f.close()

@ -1,2 +1 @@
class NzbindexSpider(object):
pass

@ -1,2 +1,7 @@
from shared import NotFoundException
class BinsearchSpider(object): class BinsearchSpider(object):
pass pass
class BinsearchResult(object):
pass

@ -1 +1,63 @@
from shared import NotFoundException, ModifiedSession, download_file
import requests, re, HTMLParser
class NzbindexSpider(object):
def find(self, name):
parser = HTMLParser.HTMLParser()
self.session = ModifiedSession()
self.session.post("https://nzbindex.com/agree/", data={"agree": "I agree"}, verify=False)
response = self.session.get("https://nzbindex.com/search/", params={
"q": name,
"age": "",
"max": "50",
"minage": "",
"sort": "agedesc",
"minsize": "100",
"maxsize": "",
"dq": "",
"poster": "",
"nfo": "",
"hasnfo": "1",
"complete": "1",
"hidespam": "1",
"more": "1"
}, verify=False)
search_results = []
results = re.findall("<tr[^>]+>(.*?)<\/tr>", response.text, re.DOTALL)
for result in results:
if 'class="threat"' in result:
# Password protected or otherwise unsuitable for download
continue
match = re.search("<label[^>]+>(.*?)<\/label>", result, re.DOTALL)
if match is None:
continue
title = parser.unescape(re.sub("<[^>]+>", "", match.group(1)))
if name.lower() in title.lower():
match = re.search('https?:\/\/nzbindex\.com\/download\/[^"]+\.nzb', result)
if match is not None:
search_results.append(NzbindexResult(title, match.group(0)))
if len(search_results) == 0:
raise NotFoundException("No results were found.")
return search_results
class NzbindexResult(object):
def __init__(self, title, url, spider):
self.title = title
self.url = url
self.spider = spider
def show(self):
print "%s -> %s" % (self.title, self.url)
def download(self, target_path):
download_file(self.spider.session.get(self.url), target_path)

Loading…
Cancel
Save