Code so far

master
Sven Slootweg 11 years ago
parent d502f11151
commit 60825fd9fe

2
.gitignore vendored

@ -0,0 +1,2 @@
*.pyc
config.json

@ -1,4 +1,102 @@
import re, oursql, requests
import re, oursql, requests, sys, json, shlex, argparse
from sources.nzbindex import NzbindexSpider
from sources.binsearch import BinsearchSpider
from shared import NotFoundException
parser = argparse.ArgumentParser(description="Automatically download NZBs for releases")
parser.add_argument("--config", dest="config", action="store", help="Use a configuration file to match against the database as source")
parser.add_argument("--list", dest="list", action="store", help="Use a newline-delimited list of releases as source")
parser.add_argument("--target", dest="target", action="store", help="Where to save the NZBs (only needed in list mode)")
parser.add_argument("--iplist", dest="list", action="store", help="Bind every request to a random IP from a newline-delimited list")
args = parser.parse_args()
if args.config is not None:
mode = "config"
elif args.list is not None:
mode = "list"
else:
sys.stderr.write("You must specify either a configuration file or a release list.\n")
exit(1)
if mode == "config":
try:
conf = json.load(open("config.json", "r"))
except IOError, e:
sys.stderr.write("You must have a valid config.json.\n")
exit(1)
if not re.match("^[a-zA-Z0-9_-]+$", conf['db']['table']):
sys.stderr.write("Table name must be a-z, A-Z, 0-9, _, -\n")
exit(1)
try:
searchconf_file = open(args.config, "r")
except IOError, e:
sys.stderr.write("The specified configuration file doesn't exist.\n")
exit(1)
queries = searchconf_file.read().splitlines()
searchconf_file.close()
db = oursql.connect(host=conf['db']['host'], user=conf['db']['user'], passwd=conf['db']['pass'], db=conf['db']['db'], autoreconnect=True)
c = db.cursor()
releases = []
for query in queries:
title, section, target = shlex.split(query)
fields = []
values = []
if title != "-":
fields.append("`release` LIKE ?")
values.append("%" + title + "%")
if section != "-":
fields.append("`section` LIKE ?")
values.append("%" + section + "%")
if len(fields) == 0:
db_query = "SELECT `release` FROM %s WHERE `time` < (UNIX_TIMESTAMP(NOW()) - 86400)" % conf['db']['table']
else:
db_query = "SELECT `release` FROM %s WHERE %s AND `time` < (UNIX_TIMESTAMP(NOW()) - 86400)" % (conf['db']['table'], " AND ".join(fields))
c.execute(db_query, values)
for row in c:
releases.append((row[0], target))
elif mode == "list":
if args.target is None:
sys.stderr.write("You did not specify a target directory with --target.\n")
exit(1)
try:
list_file = open(args.list, "r")
except IOError, e:
sys.stderr.write("The specified list file doesn't exist.\n")
exit(1)
releases = [(release, args.target) for release in list_file.read().splitlines()]
list_file.close()
sys.stdout.write("Found %d releases.\n" % len(releases))
for release in releases:
release_name, target_dir = release
try:
spider = NzbindexSpider()
results = spider.find(release_name)
except NotFoundException, e:
try:
spider = BinsearchSpider()
results = spider.find(release_name)
except NotFoundException, e:
sys.stderr.write("Could not find release %s\n" % release_name)
continue
# Process result
for result in results:
result.show()

@ -0,0 +1,31 @@
import requests, random
# These are just some random useragents, you can replace these with a different list
user_agents = [
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.110 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20100101 Firefox/21.0",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.94 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.110 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.110 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20100101 Firefox/21.0",
"Mozilla/5.0 (Windows NT 5.1; rv:21.0) Gecko/20100101 Firefox/21.0",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit/536.30.1 (KHTML, like Gecko) Version/6.0.5 Safari/536.30.1",
]
class NotFoundException(Exception):
pass
class ModifiedSession(requests.Session):
def __init__(self, *args, **kwargs):
requests.Session.__init__(self, *args, **kwargs)
self.headers['user-agent'] = random.choice(user_agents)
def download_file(self, request, target):
if request.status_code == 200:
f = open(target, "wb")
for chunk in request.iter_content():
f.write(chunk)
f.close()

@ -1,2 +1 @@
class NzbindexSpider(object):
pass

@ -1,2 +1,7 @@
from shared import NotFoundException
class BinsearchSpider(object):
pass
class BinsearchResult(object):
pass

@ -1 +1,63 @@
from shared import NotFoundException, ModifiedSession, download_file
import requests, re, HTMLParser
class NzbindexSpider(object):
def find(self, name):
parser = HTMLParser.HTMLParser()
self.session = ModifiedSession()
self.session.post("https://nzbindex.com/agree/", data={"agree": "I agree"}, verify=False)
response = self.session.get("https://nzbindex.com/search/", params={
"q": name,
"age": "",
"max": "50",
"minage": "",
"sort": "agedesc",
"minsize": "100",
"maxsize": "",
"dq": "",
"poster": "",
"nfo": "",
"hasnfo": "1",
"complete": "1",
"hidespam": "1",
"more": "1"
}, verify=False)
search_results = []
results = re.findall("<tr[^>]+>(.*?)<\/tr>", response.text, re.DOTALL)
for result in results:
if 'class="threat"' in result:
# Password protected or otherwise unsuitable for download
continue
match = re.search("<label[^>]+>(.*?)<\/label>", result, re.DOTALL)
if match is None:
continue
title = parser.unescape(re.sub("<[^>]+>", "", match.group(1)))
if name.lower() in title.lower():
match = re.search('https?:\/\/nzbindex\.com\/download\/[^"]+\.nzb', result)
if match is not None:
search_results.append(NzbindexResult(title, match.group(0)))
if len(search_results) == 0:
raise NotFoundException("No results were found.")
return search_results
class NzbindexResult(object):
def __init__(self, title, url, spider):
self.title = title
self.url = url
self.spider = spider
def show(self):
print "%s -> %s" % (self.title, self.url)
def download(self, target_path):
download_file(self.spider.session.get(self.url), target_path)

Loading…
Cancel
Save