Code so far
This commit is contained in:
parent
d502f11151
commit
60825fd9fe
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
*.pyc
|
||||
config.json
|
100
main.py
100
main.py
|
@ -1,4 +1,102 @@
|
|||
import re, oursql, requests
|
||||
import re, oursql, requests, sys, json, shlex, argparse
|
||||
|
||||
from sources.nzbindex import NzbindexSpider
|
||||
from sources.binsearch import BinsearchSpider
|
||||
from shared import NotFoundException
|
||||
|
||||
parser = argparse.ArgumentParser(description="Automatically download NZBs for releases")
|
||||
parser.add_argument("--config", dest="config", action="store", help="Use a configuration file to match against the database as source")
|
||||
parser.add_argument("--list", dest="list", action="store", help="Use a newline-delimited list of releases as source")
|
||||
parser.add_argument("--target", dest="target", action="store", help="Where to save the NZBs (only needed in list mode)")
|
||||
parser.add_argument("--iplist", dest="list", action="store", help="Bind every request to a random IP from a newline-delimited list")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.config is not None:
|
||||
mode = "config"
|
||||
elif args.list is not None:
|
||||
mode = "list"
|
||||
else:
|
||||
sys.stderr.write("You must specify either a configuration file or a release list.\n")
|
||||
exit(1)
|
||||
|
||||
if mode == "config":
|
||||
try:
|
||||
conf = json.load(open("config.json", "r"))
|
||||
except IOError, e:
|
||||
sys.stderr.write("You must have a valid config.json.\n")
|
||||
exit(1)
|
||||
|
||||
if not re.match("^[a-zA-Z0-9_-]+$", conf['db']['table']):
|
||||
sys.stderr.write("Table name must be a-z, A-Z, 0-9, _, -\n")
|
||||
exit(1)
|
||||
|
||||
try:
|
||||
searchconf_file = open(args.config, "r")
|
||||
except IOError, e:
|
||||
sys.stderr.write("The specified configuration file doesn't exist.\n")
|
||||
exit(1)
|
||||
|
||||
queries = searchconf_file.read().splitlines()
|
||||
searchconf_file.close()
|
||||
|
||||
db = oursql.connect(host=conf['db']['host'], user=conf['db']['user'], passwd=conf['db']['pass'], db=conf['db']['db'], autoreconnect=True)
|
||||
c = db.cursor()
|
||||
|
||||
releases = []
|
||||
|
||||
for query in queries:
|
||||
title, section, target = shlex.split(query)
|
||||
|
||||
fields = []
|
||||
values = []
|
||||
|
||||
if title != "-":
|
||||
fields.append("`release` LIKE ?")
|
||||
values.append("%" + title + "%")
|
||||
|
||||
if section != "-":
|
||||
fields.append("`section` LIKE ?")
|
||||
values.append("%" + section + "%")
|
||||
|
||||
if len(fields) == 0:
|
||||
db_query = "SELECT `release` FROM %s WHERE `time` < (UNIX_TIMESTAMP(NOW()) - 86400)" % conf['db']['table']
|
||||
else:
|
||||
db_query = "SELECT `release` FROM %s WHERE %s AND `time` < (UNIX_TIMESTAMP(NOW()) - 86400)" % (conf['db']['table'], " AND ".join(fields))
|
||||
|
||||
c.execute(db_query, values)
|
||||
|
||||
for row in c:
|
||||
releases.append((row[0], target))
|
||||
elif mode == "list":
|
||||
if args.target is None:
|
||||
sys.stderr.write("You did not specify a target directory with --target.\n")
|
||||
exit(1)
|
||||
|
||||
try:
|
||||
list_file = open(args.list, "r")
|
||||
except IOError, e:
|
||||
sys.stderr.write("The specified list file doesn't exist.\n")
|
||||
exit(1)
|
||||
|
||||
releases = [(release, args.target) for release in list_file.read().splitlines()]
|
||||
list_file.close()
|
||||
|
||||
sys.stdout.write("Found %d releases.\n" % len(releases))
|
||||
|
||||
for release in releases:
|
||||
release_name, target_dir = release
|
||||
|
||||
try:
|
||||
spider = NzbindexSpider()
|
||||
results = spider.find(release_name)
|
||||
except NotFoundException, e:
|
||||
try:
|
||||
spider = BinsearchSpider()
|
||||
results = spider.find(release_name)
|
||||
except NotFoundException, e:
|
||||
sys.stderr.write("Could not find release %s\n" % release_name)
|
||||
continue
|
||||
|
||||
# Process result
|
||||
for result in results:
|
||||
result.show()
|
||||
|
|
31
shared.py
Normal file
31
shared.py
Normal file
|
@ -0,0 +1,31 @@
|
|||
import requests, random
|
||||
|
||||
# These are just some random useragents, you can replace these with a different list
|
||||
user_agents = [
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.110 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20100101 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.94 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.110 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.110 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20100101 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 5.1; rv:21.0) Gecko/20100101 Firefox/21.0",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit/536.30.1 (KHTML, like Gecko) Version/6.0.5 Safari/536.30.1",
|
||||
]
|
||||
|
||||
class NotFoundException(Exception):
|
||||
pass
|
||||
|
||||
class ModifiedSession(requests.Session):
|
||||
def __init__(self, *args, **kwargs):
|
||||
requests.Session.__init__(self, *args, **kwargs)
|
||||
self.headers['user-agent'] = random.choice(user_agents)
|
||||
|
||||
def download_file(self, request, target):
|
||||
if request.status_code == 200:
|
||||
f = open(target, "wb")
|
||||
|
||||
for chunk in request.iter_content():
|
||||
f.write(chunk)
|
||||
|
||||
f.close()
|
|
@ -1,2 +1 @@
|
|||
class NzbindexSpider(object):
|
||||
pass
|
||||
|
||||
|
|
|
@ -1,2 +1,7 @@
|
|||
from shared import NotFoundException
|
||||
|
||||
class BinsearchSpider(object):
|
||||
pass
|
||||
|
||||
class BinsearchResult(object):
|
||||
pass
|
||||
|
|
|
@ -1 +1,63 @@
|
|||
from shared import NotFoundException, ModifiedSession, download_file
|
||||
import requests, re, HTMLParser
|
||||
|
||||
class NzbindexSpider(object):
|
||||
def find(self, name):
|
||||
parser = HTMLParser.HTMLParser()
|
||||
self.session = ModifiedSession()
|
||||
self.session.post("https://nzbindex.com/agree/", data={"agree": "I agree"}, verify=False)
|
||||
|
||||
response = self.session.get("https://nzbindex.com/search/", params={
|
||||
"q": name,
|
||||
"age": "",
|
||||
"max": "50",
|
||||
"minage": "",
|
||||
"sort": "agedesc",
|
||||
"minsize": "100",
|
||||
"maxsize": "",
|
||||
"dq": "",
|
||||
"poster": "",
|
||||
"nfo": "",
|
||||
"hasnfo": "1",
|
||||
"complete": "1",
|
||||
"hidespam": "1",
|
||||
"more": "1"
|
||||
}, verify=False)
|
||||
|
||||
search_results = []
|
||||
|
||||
results = re.findall("<tr[^>]+>(.*?)<\/tr>", response.text, re.DOTALL)
|
||||
|
||||
for result in results:
|
||||
if 'class="threat"' in result:
|
||||
# Password protected or otherwise unsuitable for download
|
||||
continue
|
||||
|
||||
match = re.search("<label[^>]+>(.*?)<\/label>", result, re.DOTALL)
|
||||
|
||||
if match is None:
|
||||
continue
|
||||
|
||||
title = parser.unescape(re.sub("<[^>]+>", "", match.group(1)))
|
||||
|
||||
if name.lower() in title.lower():
|
||||
match = re.search('https?:\/\/nzbindex\.com\/download\/[^"]+\.nzb', result)
|
||||
|
||||
if match is not None:
|
||||
search_results.append(NzbindexResult(title, match.group(0)))
|
||||
|
||||
if len(search_results) == 0:
|
||||
raise NotFoundException("No results were found.")
|
||||
|
||||
return search_results
|
||||
class NzbindexResult(object):
|
||||
def __init__(self, title, url, spider):
|
||||
self.title = title
|
||||
self.url = url
|
||||
self.spider = spider
|
||||
|
||||
def show(self):
|
||||
print "%s -> %s" % (self.title, self.url)
|
||||
|
||||
def download(self, target_path):
|
||||
download_file(self.spider.session.get(self.url), target_path)
|
||||
|
|
Loading…
Reference in a new issue