Update the shared code and all resolvers for the new class-based model

master
Sven Slootweg 12 years ago
parent 92bceed168
commit 88e6819bf2

@ -1,21 +1,28 @@
import re import re
from resolvers import * import resolvers
def resolve(url): def resolve(url):
if re.match("https?:\/\/(www\.)?putlocker\.com", url) is not None: if re.match("https?:\/\/(www\.)?putlocker\.com", url) is not None:
return putlocker.resolve(url) task = resolvers.PutlockerTask(url)
return task.run()
elif re.match("https?:\/\/(www\.)?sockshare\.com", url) is not None: elif re.match("https?:\/\/(www\.)?sockshare\.com", url) is not None:
return sockshare.resolve(url) task = resolvers.SockshareTask(url)
return task.run()
elif re.match("https?:\/\/(www\.)?1channel\.ch\/external\.php", url) is not None: elif re.match("https?:\/\/(www\.)?1channel\.ch\/external\.php", url) is not None:
return onechannel.resolve(url) task = resolvers.OneChannelTask(url)
return task.run()
elif re.match("https?:\/\/(www\.)?youtube\.com\/watch\?", url) is not None: elif re.match("https?:\/\/(www\.)?youtube\.com\/watch\?", url) is not None:
return youtube.resolve(url) task = resolvers.YoutubeTask(url)
return task.run()
elif re.match("https?:\/\/(www\.)?filebox\.com\/[a-zA-Z0-9]+", url) is not None: elif re.match("https?:\/\/(www\.)?filebox\.com\/[a-zA-Z0-9]+", url) is not None:
return filebox.resolve(url) task = resolvers.FileboxTask(url)
return task.run()
elif re.match("https?:\/\/(www\.)?pastebin\.com\/[a-zA-Z0-9]+", url) is not None: elif re.match("https?:\/\/(www\.)?pastebin\.com\/[a-zA-Z0-9]+", url) is not None:
return pastebin.resolve(url) task = resolvers.PastebinTask(url)
return task.run()
elif re.match("https?:\/\/(www\.)?mediafire\.com\/\?[a-z0-9]+", url) is not None: elif re.match("https?:\/\/(www\.)?mediafire\.com\/\?[a-z0-9]+", url) is not None:
return mediafire.resolve(url) task = resolvers.MediafireTask(url)
return task.run()
else: else:
return {} return {}
@ -25,10 +32,10 @@ def recurse(url):
while True: while True:
result = resolve(url) result = resolve(url)
if result == {}: if result.state != "finished":
return previous_result return previous_result
elif 'url' not in result: elif result.result_type != "url":
return result return result
url = result['url'] url = result.results['url']
previous_result = result previous_result = result

@ -1,2 +1,9 @@
def resolve(input): from resolv.shared import Task
return {'dummy': input}
class DummyTask(Task):
result_type = "dummy"
def run(self):
self.results = {'dummy': self.url}
self.state = "finished"
return self

@ -1,70 +1,43 @@
import re, time, urllib2 import re, time, urllib2
from resolv.shared import ResolverError from resolv.shared import ResolverError, Task
def resolve(url): class FileboxTask(Task):
matches = re.search("https?:\/\/(www\.)?filebox\.com\/([a-zA-Z0-9]+)", url) result_type = "video"
if matches is None: def run(self):
raise ResolverError("The provided URL is not a valid Filebox.com URL.") matches = re.search("https?:\/\/(www\.)?filebox\.com\/([a-zA-Z0-9]+)", self.url)
video_id = matches.group(2) if matches is None:
self.state = "invalid"
try: raise ResolverError("The provided URL is not a valid Filebox.com URL.")
contents = urllib2.urlopen("http://www.filebox.com/embed-%s-970x543.html" % video_id).read()
except: video_id = matches.group(2)
raise ResolverError("Could not retrieve the video page.")
try:
matches = re.search("url: '([^']+)',", contents) contents = self.fetch_page("http://www.filebox.com/embed-%s-970x543.html" % video_id)
except urllib2.URLError, e:
if matches is None: self.state = "failed"
raise ResolverError("No video was found on the specified URL.") raise ResolverError("Could not retrieve the video page.")
video_file = matches.group(1) matches = re.search("url: '([^']+)',", contents)
stream_dict = { if matches is None:
'url' : video_file, self.state = "invalid"
'quality' : "unknown", raise ResolverError("No video was found on the specified URL. The Filebox.com resolver currently only supports videos.")
'priority' : 1,
'format' : "unknown" video_file = matches.group(1)
}
stream_dict = {
return { 'title': "", 'videos': [stream_dict] } 'url' : video_file,
'quality' : "unknown",
def resolve2(url): 'priority' : 1,
# This is a fallback function in case no video could be found through the resolve() method. 'format' : "unknown"
# It's not recommended to use it, as it introduces a 5 second wait. }
try: self.results = {
import mechanize 'title': "",
except ImportError: 'videos': [stream_dict]
raise ResolverError("The Python mechanize module is required to resolve Filebox.com URLs.") }
matches = re.search("https?:\/\/(www\.)?filebox\.com\/([a-zA-Z0-9]+)", url) self.state = "finished"
return self
if matches is None:
raise ResolverError("The provided URL is not a valid Filebox.com URL.")
try:
browser = mechanize.Browser()
browser.set_handle_robots(False)
browser.open(url)
except:
raise ResolverError("The Filebox.com site could not be reached.")
time.sleep(6)
try:
browser.select_form(nr=0)
result = browser.submit()
page = result.read()
except Exception, e:
raise ResolverError("The file was removed, or the URL is incorrect.")
matches = re.search("this\.play\('([^']+)'\)", page)
if matches is None:
raise ResolverError("No video file was found on the given URL; the Filebox.com server for this file may be in maintenance mode, or the given URL may not be a video file. The Filebox.com resolver currently only supports video links.")
video_file = matches.group(1)
return { 'title': "", 'videos': { 'video': video_file } }

@ -1,28 +1,40 @@
import re, urllib2 import re, urllib2
from resolv.shared import ResolverError, unescape from resolv.shared import ResolverError, unescape, Task
def resolve(url): class MediafireTask(Task):
try: result_type = "file"
contents = urllib2.urlopen(url).read()
except:
raise ResolverError("Could not retrieve the specified URL.")
matches = re.search('kNO = "([^"]+)";', contents) def run(self):
try:
if matches is None: contents = self.fetch_page(self.url)
raise ResolverError("No download was found on the given URL; the server for this file may be in maintenance mode, or the given URL may not be valid. It is also possible that you have been blocked - CAPTCHA support is not yet present.") except urllib2.URLError, e:
self.state = "failed"
file_url = matches.group(1) raise ResolverError("Could not retrieve the specified URL.")
try: matches = re.search('kNO = "([^"]+)";', contents)
file_title = unescape(re.search('<title>([^<]+)<\/title>', contents).group(1))
except: if matches is None:
raise ResolverError("Could not find the download title.") self.state = "failed"
raise ResolverError("No download was found on the given URL; the server for this file may be in maintenance mode, or the given URL may not be valid. It is also possible that you have been blocked - CAPTCHA support is not yet present.")
file_dict = {
'url' : file_url, file_url = matches.group(1)
'priority' : 1,
'format' : "unknown" try:
} file_title = unescape(re.search('<title>([^<]+)<\/title>', contents).group(1))
except:
return { 'title': file_title, 'files': [file_dict] } self.state = "failed"
raise ResolverError("Could not find the download title.")
file_dict = {
'url' : file_url,
'priority' : 1,
'format' : "unknown"
}
self.results = {
'title': file_title,
'files': [file_dict]
}
self.state = "finished"
return self

@ -1,15 +1,22 @@
import re, base64 import re, base64
from resolv.shared import ResolverError from resolv.shared import ResolverError, Task
def resolve(url): class OneChannelTask(Task):
matches = re.search("https?:\/\/(www\.)?1channel\.ch\/external\.php\?.*url=([^&]+)", url) result_type = "url"
if matches is None:
raise ResolverError("The provided URL is not a valid external 1channel URL.")
try:
real_url = base64.b64decode(matches.group(2)).strip()
except TypeError:
raise ResolverError("The provided URL is malformed.")
return { 'url': real_url } def run(self):
matches = re.search("https?:\/\/(www\.)?1channel\.ch\/external\.php\?.*url=([^&]+)", self.url)
if matches is None:
self.state = "invalid"
raise ResolverError("The provided URL is not a valid external 1channel URL.")
try:
real_url = base64.b64decode(matches.group(2)).strip()
except TypeError:
self.state = "failed"
raise ResolverError("The provided URL is malformed.")
self.results = { 'url': real_url }
self.state = "finished"
return self

@ -1,30 +1,42 @@
import re, urllib, urllib2 import re, urllib2
from resolv.shared import ResolverError, unescape from resolv.shared import ResolverError, unescape, Task
def resolve(url): class PastebinTask(Task):
matches = re.search("https?:\/\/(www\.)?pastebin\.com\/([a-zA-Z0-9]+)", url) result_type = "text"
if matches is None: def run(self):
raise ResolverError("The provided URL is not a valid Pastebin URL.") matches = re.search("https?:\/\/(www\.)?pastebin\.com\/([a-zA-Z0-9]+)", self.url)
paste_id = matches.group(2) if matches is None:
self.state = "invalid"
try: raise ResolverError("The provided URL is not a valid Pastebin URL.")
contents = urllib2.urlopen(url).read()
except: paste_id = matches.group(2)
raise ResolverError("Could not retrieve the specified URL. The specified paste may not exist.")
try:
matches = re.search("<h1>([^<]+)</h1>", contents) contents = self.fetch_page(self.url)
except urllib2.URLError, e:
if matches is None: self.state = "failed"
raise ResolverError("The provided URL is not a valid paste.") raise ResolverError("Could not retrieve the specified URL. The paste may not exist.")
paste_title = unescape(matches.group(1)) matches = re.search("<h1>([^<]+)</h1>", contents)
file_dict = { if matches is None:
'url' : "http://pastebin.com/download.php?i=%s" % paste_id, self.state = "invalid"
'priority' : 1, raise ResolverError("The provided URL is not a valid paste.")
'format' : "text"
} paste_title = unescape(matches.group(1))
return { 'title': paste_title, 'files': [file_dict] } resolved = {
'url' : "http://pastebin.com/download.php?i=%s" % paste_id,
'priority' : 1,
'format' : "text"
}
self.results = {
'title': paste_title,
'files': [resolved]
}
self.state = "finished"
return self

@ -1,62 +1,78 @@
import re import re
from resolv.shared import ResolverError, unescape from resolv.shared import ResolverError, unescape, Task
def resolve(url): class PutlockerTask(Task):
try: result_type = "video"
import mechanize
except ImportError:
raise ResolverError("The Python mechanize module is required to resolve PutLocker URLs.")
matches = re.search("https?:\/\/(www\.)?putlocker\.com\/(file|embed)\/([A-Z0-9]+)", url) def run(self):
try:
import mechanize
except ImportError:
self.state = "failed"
raise ResolverError("The Python mechanize module is required to resolve PutLocker URLs.")
matches = re.search("https?:\/\/(www\.)?putlocker\.com\/(file|embed)\/([A-Z0-9]+)", self.url)
if matches is None: if matches is None:
raise ResolverError("The provided URL is not a valid PutLocker URL.") self.state = "invalid"
raise ResolverError("The provided URL is not a valid PutLocker URL.")
video_id = matches.group(3)
video_id = matches.group(3)
try:
browser = mechanize.Browser() try:
browser.set_handle_robots(False) browser = mechanize.Browser()
browser.open("http://putlocker.com/embed/%s" % video_id) browser.set_handle_robots(False)
except: browser.open("http://putlocker.com/embed/%s" % video_id)
raise ResolverError("The PutLocker site could not be reached.") except:
self.state = "failed"
try: raise ResolverError("The PutLocker site could not be reached.")
browser.select_form(nr=0)
result = browser.submit() try:
page = result.read() browser.select_form(nr=0)
except Exception, e: result = browser.submit()
raise ResolverError("The file was removed, or the URL is incorrect.") page = result.read()
except Exception, e:
matches = re.search("playlist: '([^']+)'", page) self.state = "nonexistent"
raise ResolverError("The file was removed, or the URL is incorrect.")
if matches is None:
raise ResolverError("No playlist was found on the given URL; the PutLocker server for this file may be in maintenance mode, or the given URL may not be a video file. The PutLocker resolver currently only supports video links.") matches = re.search("playlist: '([^']+)'", page)
playlist = matches.group(1) if matches is None:
raise ResolverError("No playlist was found on the given URL; the PutLocker server for this file may be in maintenance mode, or the given URL may not be a video file. The PutLocker resolver currently only supports video links.")
try:
browser.open("http://www.putlocker.com%s" % playlist) playlist = matches.group(1)
except:
raise ResolverError("The playlist file for the given URL could not be loaded.") try:
browser.open("http://www.putlocker.com%s" % playlist)
matches = re.search("url=\"([^\"]+)\" type=\"video\/x-flv\"", browser.response().read()) except:
self.state = "failed"
if matches is None: raise ResolverError("The playlist file for the given URL could not be loaded.")
raise ResolverError("The playlist file does not contain any video URLs. The PutLocker resolver currently only supports video links.")
matches = re.search("url=\"([^\"]+)\" type=\"video\/x-flv\"", browser.response().read())
video_file = matches.group(1)
if matches is None:
try: self.state = "failed"
video_title = unescape(re.search('<a href="\/file\/[^"]+"[^>]*><strong>([^<]*)<\/strong><\/a>', page).group(1)) raise ResolverError("The playlist file does not contain any video URLs. The PutLocker resolver currently only supports video links.")
except:
raise ResolverError("Could not find the video title.") video_file = matches.group(1)
stream_dict = { try:
'url' : video_file, video_title = unescape(re.search('<a href="\/file\/[^"]+"[^>]*><strong>([^<]*)<\/strong><\/a>', page).group(1))
'quality' : "unknown", except:
'priority' : 1, self.state = "failed"
'format' : "unknown" raise ResolverError("Could not find the video title.")
}
stream_dict = {
return { 'title': video_title, 'videos': [stream_dict] } 'url' : video_file,
'quality' : "unknown",
'priority' : 1,
'format' : "unknown"
}
self.results = {
'title': video_title,
'videos': [stream_dict]
}
self.state = "finished"
return self

@ -1,62 +1,78 @@
import re import re
from resolv.shared import ResolverError, unescape from resolv.shared import ResolverError, unescape, Task
def resolve(url): class SockshareTask(Task):
try: result_type = "video"
import mechanize
except ImportError:
raise ResolverError("The Python mechanize module is required to resolve SockShare URLs.")
matches = re.search("https?:\/\/(www\.)?sockshare\.com\/(file|embed)\/([A-Z0-9]+)", url) def run(self):
try:
import mechanize
except ImportError:
self.state = "failed"
raise ResolverError("The Python mechanize module is required to resolve Sockshare URLs.")
matches = re.search("https?:\/\/(www\.)?sockshare\.com\/(file|embed)\/([A-Z0-9]+)", self.url)
if matches is None: if matches is None:
raise ResolverError("The provided URL is not a valid SockShare URL.") self.state = "invalid"
raise ResolverError("The provided URL is not a valid Sockshare URL.")
video_id = matches.group(3)
video_id = matches.group(3)
try:
browser = mechanize.Browser() try:
browser.set_handle_robots(False) browser = mechanize.Browser()
browser.open("http://sockshare.com/embed/%s" % video_id) browser.set_handle_robots(False)
except: browser.open("http://sockshare.com/embed/%s" % video_id)
raise ResolverError("The SockShare site could not be reached.") except:
self.state = "failed"
try: raise ResolverError("The Sockshare site could not be reached.")
browser.select_form(nr=0)
result = browser.submit() try:
page = result.read() browser.select_form(nr=0)
except Exception, e: result = browser.submit()
raise ResolverError("The file was removed, or the URL is incorrect.") page = result.read()
except Exception, e:
matches = re.search("playlist: '([^']+)'", page) self.state = "nonexistent"
raise ResolverError("The file was removed, or the URL is incorrect.")
if matches is None:
raise ResolverError("No playlist was found on the given URL; the SockShare server for this file may be in maintenance mode, or the given URL may not be a video file. The SockShare resolver currently only supports video links.") matches = re.search("playlist: '([^']+)'", page)
playlist = matches.group(1) if matches is None:
raise ResolverError("No playlist was found on the given URL; the Sockshare server for this file may be in maintenance mode, or the given URL may not be a video file. The Sockshare resolver currently only supports video links.")
try:
browser.open("http://www.sockshare.com%s" % playlist) playlist = matches.group(1)
except:
raise ResolverError("The playlist file for the given URL could not be loaded.") try:
browser.open("http://www.sockshare.com%s" % playlist)
matches = re.search("url=\"([^\"]+)\" type=\"video\/x-flv\"", browser.response().read()) except:
self.state = "failed"
if matches is None: raise ResolverError("The playlist file for the given URL could not be loaded.")
raise ResolverError("The playlist file does not contain any video URLs. The SockShare resolver currently only supports video links.")
matches = re.search("url=\"([^\"]+)\" type=\"video\/x-flv\"", browser.response().read())
video_file = matches.group(1)
if matches is None:
try: self.state = "failed"
video_title = unescape(re.search('<a href="\/file\/[^"]+"[^>]*><strong>([^<]*)<\/strong><\/a>', page).group(1)) raise ResolverError("The playlist file does not contain any video URLs. The Sockshare resolver currently only supports video links.")
except:
raise ResolverError("Could not find the video title.") video_file = matches.group(1)
stream_dict = { try:
'url' : video_file, video_title = unescape(re.search('<a href="\/file\/[^"]+"[^>]*><strong>([^<]*)<\/strong><\/a>', page).group(1))
'quality' : "unknown", except:
'priority' : 1, self.state = "failed"
'format' : "unknown" raise ResolverError("Could not find the video title.")
}
stream_dict = {
return { 'title': video_title, 'videos': [stream_dict] } 'url' : video_file,
'quality' : "unknown",
'priority' : 1,
'format' : "unknown"
}
self.results = {
'title': video_title,
'videos': [stream_dict]
}
self.state = "finished"
return self

@ -1,88 +1,117 @@
import re, urllib, urllib2 import re, urllib, urllib2, urlparse
from resolv.shared import ResolverError, unescape from resolv.shared import ResolverError, unescape, Task
def resolve(url): class YoutubeTask(Task):
try: result_type = "video"
contents = urllib2.urlopen(url).read()
except:
raise ResolverError("Could not retrieve the specified URL.")
map_start = "url_encoded_fmt_stream_map=" extra_headers = {
map_end = "\\u0026amp;" 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-us,en;q=0.5'
}
try: def run(self):
pos_start = contents.index(map_start) + len(map_start) + 6 try:
snippet = contents[pos_start:] contents = self.fetch_page(self.url)
except ValueError: except urllib2.URLError, e:
raise ResolverError("The starting position for the YouTube player configuration could not be found. Is the URL really a valid video page?") self.state = "failed"
raise ResolverError("Could not retrieve the specified URL.")
try:
pos_end = snippet.index(map_end) map_start = "url_encoded_fmt_stream_map="
stream_map = snippet[:pos_end] map_end = "\\u0026amp;"
except ValueError:
raise ResolverError("The ending position for the YouTube player configuration could not be found.")
try:
stream_map = urllib.unquote(stream_map)
streams = stream_map.split(',url=')
except:
raise ResolverError("The YouTube player configuration is corrupted.")
stream_pool = []
for stream in streams:
fields = stream.split('&')
if len(fields) < 5: try:
raise ResolverError("The amount of fields in the YouTube player configuration is incorrect.") pos_start = contents.index(map_start) + len(map_start)
snippet = contents[pos_start:]
except ValueError:
self.state = "failed"
raise ResolverError("The starting position for the YouTube player configuration could not be found. Is the URL really a valid video page?")
video_url = urllib.unquote(fields[0]) try:
quality = fields[1].split("=")[1] pos_end = snippet.index(map_end)
fallback_host = fields[2].split("=")[1] stream_map = snippet[:pos_end]
mimetype = urllib.unquote(fields[3].split("=")[1]) except ValueError:
itag = fields[4].split("=", 2)[1] self.state = "failed"
raise ResolverError("The ending position for the YouTube player configuration could not be found.")
if mimetype.startswith("video/mp4"): try:
video_format = "mp4" stream_map = urllib.unquote(stream_map)
elif mimetype.startswith("video/x-flv"): streams = stream_map.split(',')
video_format = "flv" except:
elif mimetype.startswith("video/3gpp"): self.state = "failed"
video_format = "3gp" raise ResolverError("The YouTube player configuration is corrupted.")
elif mimetype.startswith("video/webm"):
video_format = "webm"
else:
video_format = "unknown"
if quality == "small": stream_pool = []
video_quality = "240p"
video_priority = 5
elif quality == "medium":
video_quality = "360p"
video_priority = 4
elif quality == "large":
video_quality = "480p"
video_priority = 3
elif quality == "hd720":
video_quality = "720p"
video_priority = 2
elif quality == "hd1080":
video_quality = "1080p"
video_priority = 1
else:
video_quality = "unknown"
stream_dict = { for stream in streams:
'url' : video_url, fields = urlparse.parse_qs(stream)
'quality' : video_quality,
'priority' : video_priority, if len(fields) < 6:
'format' : video_format self.state = "failed"
raise ResolverError("The amount of fields in the YouTube player configuration is incorrect.")
signature = fields['sig'][0]
video_url = "%s&signature=%s" % (fields['url'][0], signature)
quality = fields['quality'][0]
fallback_host = fields['fallback_host'][0]
mimetype = fields['type'][0]
itag = fields['itag'][0]
if mimetype.startswith("video/mp4"):
video_format = "mp4"
elif mimetype.startswith("video/x-flv"):
video_format = "flv"
elif mimetype.startswith("video/3gpp"):
video_format = "3gp"
elif mimetype.startswith("video/webm"):
video_format = "webm"
else:
video_format = "unknown"
if quality == "small":
video_quality = "240p"
video_priority = 5
elif quality == "medium":
video_quality = "360p"
video_priority = 4
elif quality == "large":
video_quality = "480p"
video_priority = 3
elif quality == "hd720":
video_quality = "720p"
video_priority = 2
elif quality == "hd1080":
video_quality = "1080p"
video_priority = 1
else:
video_quality = "unknown"
video_priority = 0
print "UNKNOWN: %s" % quality
stream_dict = {
'url' : video_url,
'quality' : video_quality,
'priority' : video_priority,
'format' : video_format,
'extra' : {
'itag': itag,
'mimetype': mimetype,
'fallback_host': fallback_host
}
}
stream_pool.append(stream_dict)
try:
video_title = unescape(re.search('<meta property="og:title" content="([^"]*)">', contents).group(1))
except:
self.state = "failed"
raise ResolverError("Could not find the video title.")
self.results = {
'title': video_title,
'videos': stream_pool
} }
stream_pool.append(stream_dict) self.state = "finished"
return self
try:
video_title = unescape(re.search('<meta property="og:title" content="([^"]*)">', contents).group(1))
except:
raise ResolverError("Could not find the video title.")
return { 'title': video_title, 'videos': stream_pool }

@ -1,4 +1,5 @@
from HTMLParser import HTMLParser from HTMLParser import HTMLParser
import cookielib, urllib2
import sys import sys
reload(sys) reload(sys)
@ -11,5 +12,45 @@ class ResolverError(Exception):
def __str__(self): def __str__(self):
return repr(self.val) return repr(self.val)
class Task():
captcha = None
cookiejar = None
useragent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.47 Safari/536.11"
opener = None
results = None
state = "none"
url = ""
result_type = "none"
extra_headers = {}
def __init__(self, url):
self.cookiejar = cookielib.CookieJar()
self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookiejar))
self.opener.addheaders = []
self.extra_headers['User-agent'] = self.useragent
for header, payload in self.extra_headers.iteritems():
self.opener.addheaders.append((header, payload))
self.url = url
def run(self):
self.state = "finished"
self.results = self.url
return self
def fetch_page(self, url):
return self.opener.open(url).read()
class Captcha():
image = ""
audio = ""
def __init__(image="", audio=""):
self.image = image
self.audio = audio
def unescape(s): def unescape(s):
return HTMLParser.unescape.__func__(HTMLParser, s) return HTMLParser.unescape.__func__(HTMLParser, s)

Loading…
Cancel
Save