Update the shared code and all resolvers for the new class-based model

master
Sven Slootweg 12 years ago
parent 92bceed168
commit 88e6819bf2

@ -1,21 +1,28 @@
import re import re
from resolvers import * import resolvers
def resolve(url): def resolve(url):
if re.match("https?:\/\/(www\.)?putlocker\.com", url) is not None: if re.match("https?:\/\/(www\.)?putlocker\.com", url) is not None:
return putlocker.resolve(url) task = resolvers.PutlockerTask(url)
return task.run()
elif re.match("https?:\/\/(www\.)?sockshare\.com", url) is not None: elif re.match("https?:\/\/(www\.)?sockshare\.com", url) is not None:
return sockshare.resolve(url) task = resolvers.SockshareTask(url)
return task.run()
elif re.match("https?:\/\/(www\.)?1channel\.ch\/external\.php", url) is not None: elif re.match("https?:\/\/(www\.)?1channel\.ch\/external\.php", url) is not None:
return onechannel.resolve(url) task = resolvers.OneChannelTask(url)
return task.run()
elif re.match("https?:\/\/(www\.)?youtube\.com\/watch\?", url) is not None: elif re.match("https?:\/\/(www\.)?youtube\.com\/watch\?", url) is not None:
return youtube.resolve(url) task = resolvers.YoutubeTask(url)
return task.run()
elif re.match("https?:\/\/(www\.)?filebox\.com\/[a-zA-Z0-9]+", url) is not None: elif re.match("https?:\/\/(www\.)?filebox\.com\/[a-zA-Z0-9]+", url) is not None:
return filebox.resolve(url) task = resolvers.FileboxTask(url)
return task.run()
elif re.match("https?:\/\/(www\.)?pastebin\.com\/[a-zA-Z0-9]+", url) is not None: elif re.match("https?:\/\/(www\.)?pastebin\.com\/[a-zA-Z0-9]+", url) is not None:
return pastebin.resolve(url) task = resolvers.PastebinTask(url)
return task.run()
elif re.match("https?:\/\/(www\.)?mediafire\.com\/\?[a-z0-9]+", url) is not None: elif re.match("https?:\/\/(www\.)?mediafire\.com\/\?[a-z0-9]+", url) is not None:
return mediafire.resolve(url) task = resolvers.MediafireTask(url)
return task.run()
else: else:
return {} return {}
@ -25,10 +32,10 @@ def recurse(url):
while True: while True:
result = resolve(url) result = resolve(url)
if result == {}: if result.state != "finished":
return previous_result return previous_result
elif 'url' not in result: elif result.result_type != "url":
return result return result
url = result['url'] url = result.results['url']
previous_result = result previous_result = result

@ -1,2 +1,9 @@
def resolve(input): from resolv.shared import Task
return {'dummy': input}
class DummyTask(Task):
result_type = "dummy"
def run(self):
self.results = {'dummy': self.url}
self.state = "finished"
return self

@ -1,70 +1,43 @@
import re, time, urllib2 import re, time, urllib2
from resolv.shared import ResolverError from resolv.shared import ResolverError, Task
def resolve(url): class FileboxTask(Task):
matches = re.search("https?:\/\/(www\.)?filebox\.com\/([a-zA-Z0-9]+)", url) result_type = "video"
if matches is None: def run(self):
raise ResolverError("The provided URL is not a valid Filebox.com URL.") matches = re.search("https?:\/\/(www\.)?filebox\.com\/([a-zA-Z0-9]+)", self.url)
video_id = matches.group(2) if matches is None:
self.state = "invalid"
raise ResolverError("The provided URL is not a valid Filebox.com URL.")
try: video_id = matches.group(2)
contents = urllib2.urlopen("http://www.filebox.com/embed-%s-970x543.html" % video_id).read()
except:
raise ResolverError("Could not retrieve the video page.")
matches = re.search("url: '([^']+)',", contents) try:
contents = self.fetch_page("http://www.filebox.com/embed-%s-970x543.html" % video_id)
except urllib2.URLError, e:
self.state = "failed"
raise ResolverError("Could not retrieve the video page.")
if matches is None: matches = re.search("url: '([^']+)',", contents)
raise ResolverError("No video was found on the specified URL.")
video_file = matches.group(1) if matches is None:
self.state = "invalid"
raise ResolverError("No video was found on the specified URL. The Filebox.com resolver currently only supports videos.")
stream_dict = { video_file = matches.group(1)
'url' : video_file,
'quality' : "unknown",
'priority' : 1,
'format' : "unknown"
}
return { 'title': "", 'videos': [stream_dict] } stream_dict = {
'url' : video_file,
'quality' : "unknown",
'priority' : 1,
'format' : "unknown"
}
def resolve2(url): self.results = {
# This is a fallback function in case no video could be found through the resolve() method. 'title': "",
# It's not recommended to use it, as it introduces a 5 second wait. 'videos': [stream_dict]
}
try: self.state = "finished"
import mechanize return self
except ImportError:
raise ResolverError("The Python mechanize module is required to resolve Filebox.com URLs.")
matches = re.search("https?:\/\/(www\.)?filebox\.com\/([a-zA-Z0-9]+)", url)
if matches is None:
raise ResolverError("The provided URL is not a valid Filebox.com URL.")
try:
browser = mechanize.Browser()
browser.set_handle_robots(False)
browser.open(url)
except:
raise ResolverError("The Filebox.com site could not be reached.")
time.sleep(6)
try:
browser.select_form(nr=0)
result = browser.submit()
page = result.read()
except Exception, e:
raise ResolverError("The file was removed, or the URL is incorrect.")
matches = re.search("this\.play\('([^']+)'\)", page)
if matches is None:
raise ResolverError("No video file was found on the given URL; the Filebox.com server for this file may be in maintenance mode, or the given URL may not be a video file. The Filebox.com resolver currently only supports video links.")
video_file = matches.group(1)
return { 'title': "", 'videos': { 'video': video_file } }

@ -1,28 +1,40 @@
import re, urllib2 import re, urllib2
from resolv.shared import ResolverError, unescape from resolv.shared import ResolverError, unescape, Task
def resolve(url): class MediafireTask(Task):
try: result_type = "file"
contents = urllib2.urlopen(url).read()
except:
raise ResolverError("Could not retrieve the specified URL.")
matches = re.search('kNO = "([^"]+)";', contents) def run(self):
try:
contents = self.fetch_page(self.url)
except urllib2.URLError, e:
self.state = "failed"
raise ResolverError("Could not retrieve the specified URL.")
if matches is None: matches = re.search('kNO = "([^"]+)";', contents)
raise ResolverError("No download was found on the given URL; the server for this file may be in maintenance mode, or the given URL may not be valid. It is also possible that you have been blocked - CAPTCHA support is not yet present.")
file_url = matches.group(1) if matches is None:
self.state = "failed"
raise ResolverError("No download was found on the given URL; the server for this file may be in maintenance mode, or the given URL may not be valid. It is also possible that you have been blocked - CAPTCHA support is not yet present.")
try: file_url = matches.group(1)
file_title = unescape(re.search('<title>([^<]+)<\/title>', contents).group(1))
except:
raise ResolverError("Could not find the download title.")
file_dict = { try:
'url' : file_url, file_title = unescape(re.search('<title>([^<]+)<\/title>', contents).group(1))
'priority' : 1, except:
'format' : "unknown" self.state = "failed"
} raise ResolverError("Could not find the download title.")
return { 'title': file_title, 'files': [file_dict] } file_dict = {
'url' : file_url,
'priority' : 1,
'format' : "unknown"
}
self.results = {
'title': file_title,
'files': [file_dict]
}
self.state = "finished"
return self

@ -1,15 +1,22 @@
import re, base64 import re, base64
from resolv.shared import ResolverError from resolv.shared import ResolverError, Task
def resolve(url): class OneChannelTask(Task):
matches = re.search("https?:\/\/(www\.)?1channel\.ch\/external\.php\?.*url=([^&]+)", url) result_type = "url"
if matches is None: def run(self):
raise ResolverError("The provided URL is not a valid external 1channel URL.") matches = re.search("https?:\/\/(www\.)?1channel\.ch\/external\.php\?.*url=([^&]+)", self.url)
try: if matches is None:
real_url = base64.b64decode(matches.group(2)).strip() self.state = "invalid"
except TypeError: raise ResolverError("The provided URL is not a valid external 1channel URL.")
raise ResolverError("The provided URL is malformed.")
return { 'url': real_url } try:
real_url = base64.b64decode(matches.group(2)).strip()
except TypeError:
self.state = "failed"
raise ResolverError("The provided URL is malformed.")
self.results = { 'url': real_url }
self.state = "finished"
return self

@ -1,30 +1,42 @@
import re, urllib, urllib2 import re, urllib2
from resolv.shared import ResolverError, unescape from resolv.shared import ResolverError, unescape, Task
def resolve(url): class PastebinTask(Task):
matches = re.search("https?:\/\/(www\.)?pastebin\.com\/([a-zA-Z0-9]+)", url) result_type = "text"
if matches is None: def run(self):
raise ResolverError("The provided URL is not a valid Pastebin URL.") matches = re.search("https?:\/\/(www\.)?pastebin\.com\/([a-zA-Z0-9]+)", self.url)
paste_id = matches.group(2) if matches is None:
self.state = "invalid"
raise ResolverError("The provided URL is not a valid Pastebin URL.")
try: paste_id = matches.group(2)
contents = urllib2.urlopen(url).read()
except:
raise ResolverError("Could not retrieve the specified URL. The specified paste may not exist.")
matches = re.search("<h1>([^<]+)</h1>", contents) try:
contents = self.fetch_page(self.url)
except urllib2.URLError, e:
self.state = "failed"
raise ResolverError("Could not retrieve the specified URL. The paste may not exist.")
if matches is None: matches = re.search("<h1>([^<]+)</h1>", contents)
raise ResolverError("The provided URL is not a valid paste.")
paste_title = unescape(matches.group(1)) if matches is None:
self.state = "invalid"
raise ResolverError("The provided URL is not a valid paste.")
file_dict = { paste_title = unescape(matches.group(1))
'url' : "http://pastebin.com/download.php?i=%s" % paste_id,
'priority' : 1,
'format' : "text"
}
return { 'title': paste_title, 'files': [file_dict] } resolved = {
'url' : "http://pastebin.com/download.php?i=%s" % paste_id,
'priority' : 1,
'format' : "text"
}
self.results = {
'title': paste_title,
'files': [resolved]
}
self.state = "finished"
return self

@ -1,62 +1,78 @@
import re import re
from resolv.shared import ResolverError, unescape from resolv.shared import ResolverError, unescape, Task
def resolve(url): class PutlockerTask(Task):
try: result_type = "video"
import mechanize
except ImportError: def run(self):
raise ResolverError("The Python mechanize module is required to resolve PutLocker URLs.") try:
import mechanize
matches = re.search("https?:\/\/(www\.)?putlocker\.com\/(file|embed)\/([A-Z0-9]+)", url) except ImportError:
self.state = "failed"
if matches is None: raise ResolverError("The Python mechanize module is required to resolve PutLocker URLs.")
raise ResolverError("The provided URL is not a valid PutLocker URL.")
matches = re.search("https?:\/\/(www\.)?putlocker\.com\/(file|embed)\/([A-Z0-9]+)", self.url)
video_id = matches.group(3)
if matches is None:
try: self.state = "invalid"
browser = mechanize.Browser() raise ResolverError("The provided URL is not a valid PutLocker URL.")
browser.set_handle_robots(False)
browser.open("http://putlocker.com/embed/%s" % video_id) video_id = matches.group(3)
except:
raise ResolverError("The PutLocker site could not be reached.") try:
browser = mechanize.Browser()
try: browser.set_handle_robots(False)
browser.select_form(nr=0) browser.open("http://putlocker.com/embed/%s" % video_id)
result = browser.submit() except:
page = result.read() self.state = "failed"
except Exception, e: raise ResolverError("The PutLocker site could not be reached.")
raise ResolverError("The file was removed, or the URL is incorrect.")
try:
matches = re.search("playlist: '([^']+)'", page) browser.select_form(nr=0)
result = browser.submit()
if matches is None: page = result.read()
raise ResolverError("No playlist was found on the given URL; the PutLocker server for this file may be in maintenance mode, or the given URL may not be a video file. The PutLocker resolver currently only supports video links.") except Exception, e:
self.state = "nonexistent"
playlist = matches.group(1) raise ResolverError("The file was removed, or the URL is incorrect.")
try: matches = re.search("playlist: '([^']+)'", page)
browser.open("http://www.putlocker.com%s" % playlist)
except: if matches is None:
raise ResolverError("The playlist file for the given URL could not be loaded.") raise ResolverError("No playlist was found on the given URL; the PutLocker server for this file may be in maintenance mode, or the given URL may not be a video file. The PutLocker resolver currently only supports video links.")
matches = re.search("url=\"([^\"]+)\" type=\"video\/x-flv\"", browser.response().read()) playlist = matches.group(1)
if matches is None: try:
raise ResolverError("The playlist file does not contain any video URLs. The PutLocker resolver currently only supports video links.") browser.open("http://www.putlocker.com%s" % playlist)
except:
video_file = matches.group(1) self.state = "failed"
raise ResolverError("The playlist file for the given URL could not be loaded.")
try:
video_title = unescape(re.search('<a href="\/file\/[^"]+"[^>]*><strong>([^<]*)<\/strong><\/a>', page).group(1)) matches = re.search("url=\"([^\"]+)\" type=\"video\/x-flv\"", browser.response().read())
except:
raise ResolverError("Could not find the video title.") if matches is None:
self.state = "failed"
stream_dict = { raise ResolverError("The playlist file does not contain any video URLs. The PutLocker resolver currently only supports video links.")
'url' : video_file,
'quality' : "unknown", video_file = matches.group(1)
'priority' : 1,
'format' : "unknown" try:
} video_title = unescape(re.search('<a href="\/file\/[^"]+"[^>]*><strong>([^<]*)<\/strong><\/a>', page).group(1))
except:
return { 'title': video_title, 'videos': [stream_dict] } self.state = "failed"
raise ResolverError("Could not find the video title.")
stream_dict = {
'url' : video_file,
'quality' : "unknown",
'priority' : 1,
'format' : "unknown"
}
self.results = {
'title': video_title,
'videos': [stream_dict]
}
self.state = "finished"
return self

@ -1,62 +1,78 @@
import re import re
from resolv.shared import ResolverError, unescape from resolv.shared import ResolverError, unescape, Task
def resolve(url): class SockshareTask(Task):
try: result_type = "video"
import mechanize
except ImportError: def run(self):
raise ResolverError("The Python mechanize module is required to resolve SockShare URLs.") try:
import mechanize
matches = re.search("https?:\/\/(www\.)?sockshare\.com\/(file|embed)\/([A-Z0-9]+)", url) except ImportError:
self.state = "failed"
if matches is None: raise ResolverError("The Python mechanize module is required to resolve Sockshare URLs.")
raise ResolverError("The provided URL is not a valid SockShare URL.")
matches = re.search("https?:\/\/(www\.)?sockshare\.com\/(file|embed)\/([A-Z0-9]+)", self.url)
video_id = matches.group(3)
if matches is None:
try: self.state = "invalid"
browser = mechanize.Browser() raise ResolverError("The provided URL is not a valid Sockshare URL.")
browser.set_handle_robots(False)
browser.open("http://sockshare.com/embed/%s" % video_id) video_id = matches.group(3)
except:
raise ResolverError("The SockShare site could not be reached.") try:
browser = mechanize.Browser()
try: browser.set_handle_robots(False)
browser.select_form(nr=0) browser.open("http://sockshare.com/embed/%s" % video_id)
result = browser.submit() except:
page = result.read() self.state = "failed"
except Exception, e: raise ResolverError("The Sockshare site could not be reached.")
raise ResolverError("The file was removed, or the URL is incorrect.")
try:
matches = re.search("playlist: '([^']+)'", page) browser.select_form(nr=0)
result = browser.submit()
if matches is None: page = result.read()
raise ResolverError("No playlist was found on the given URL; the SockShare server for this file may be in maintenance mode, or the given URL may not be a video file. The SockShare resolver currently only supports video links.") except Exception, e:
self.state = "nonexistent"
playlist = matches.group(1) raise ResolverError("The file was removed, or the URL is incorrect.")
try: matches = re.search("playlist: '([^']+)'", page)
browser.open("http://www.sockshare.com%s" % playlist)
except: if matches is None:
raise ResolverError("The playlist file for the given URL could not be loaded.") raise ResolverError("No playlist was found on the given URL; the Sockshare server for this file may be in maintenance mode, or the given URL may not be a video file. The Sockshare resolver currently only supports video links.")
matches = re.search("url=\"([^\"]+)\" type=\"video\/x-flv\"", browser.response().read()) playlist = matches.group(1)
if matches is None: try:
raise ResolverError("The playlist file does not contain any video URLs. The SockShare resolver currently only supports video links.") browser.open("http://www.sockshare.com%s" % playlist)
except:
video_file = matches.group(1) self.state = "failed"
raise ResolverError("The playlist file for the given URL could not be loaded.")
try:
video_title = unescape(re.search('<a href="\/file\/[^"]+"[^>]*><strong>([^<]*)<\/strong><\/a>', page).group(1)) matches = re.search("url=\"([^\"]+)\" type=\"video\/x-flv\"", browser.response().read())
except:
raise ResolverError("Could not find the video title.") if matches is None:
self.state = "failed"
stream_dict = { raise ResolverError("The playlist file does not contain any video URLs. The Sockshare resolver currently only supports video links.")
'url' : video_file,
'quality' : "unknown", video_file = matches.group(1)
'priority' : 1,
'format' : "unknown" try:
} video_title = unescape(re.search('<a href="\/file\/[^"]+"[^>]*><strong>([^<]*)<\/strong><\/a>', page).group(1))
except:
return { 'title': video_title, 'videos': [stream_dict] } self.state = "failed"
raise ResolverError("Could not find the video title.")
stream_dict = {
'url' : video_file,
'quality' : "unknown",
'priority' : 1,
'format' : "unknown"
}
self.results = {
'title': video_title,
'videos': [stream_dict]
}
self.state = "finished"
return self

@ -1,88 +1,117 @@
import re, urllib, urllib2 import re, urllib, urllib2, urlparse
from resolv.shared import ResolverError, unescape from resolv.shared import ResolverError, unescape, Task
def resolve(url): class YoutubeTask(Task):
try: result_type = "video"
contents = urllib2.urlopen(url).read()
except: extra_headers = {
raise ResolverError("Could not retrieve the specified URL.") 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
map_start = "url_encoded_fmt_stream_map=" 'Accept-Language': 'en-us,en;q=0.5'
map_end = "\\u0026amp;" }
try: def run(self):
pos_start = contents.index(map_start) + len(map_start) + 6 try:
snippet = contents[pos_start:] contents = self.fetch_page(self.url)
except ValueError: except urllib2.URLError, e:
raise ResolverError("The starting position for the YouTube player configuration could not be found. Is the URL really a valid video page?") self.state = "failed"
raise ResolverError("Could not retrieve the specified URL.")
try:
pos_end = snippet.index(map_end) map_start = "url_encoded_fmt_stream_map="
stream_map = snippet[:pos_end] map_end = "\\u0026amp;"
except ValueError:
raise ResolverError("The ending position for the YouTube player configuration could not be found.") try:
pos_start = contents.index(map_start) + len(map_start)
try: snippet = contents[pos_start:]
stream_map = urllib.unquote(stream_map) except ValueError:
streams = stream_map.split(',url=') self.state = "failed"
except: raise ResolverError("The starting position for the YouTube player configuration could not be found. Is the URL really a valid video page?")
raise ResolverError("The YouTube player configuration is corrupted.")
try:
stream_pool = [] pos_end = snippet.index(map_end)
stream_map = snippet[:pos_end]
for stream in streams: except ValueError:
fields = stream.split('&') self.state = "failed"
raise ResolverError("The ending position for the YouTube player configuration could not be found.")
if len(fields) < 5:
raise ResolverError("The amount of fields in the YouTube player configuration is incorrect.") try:
stream_map = urllib.unquote(stream_map)
video_url = urllib.unquote(fields[0]) streams = stream_map.split(',')
quality = fields[1].split("=")[1] except:
fallback_host = fields[2].split("=")[1] self.state = "failed"
mimetype = urllib.unquote(fields[3].split("=")[1]) raise ResolverError("The YouTube player configuration is corrupted.")
itag = fields[4].split("=", 2)[1]
stream_pool = []
if mimetype.startswith("video/mp4"):
video_format = "mp4"
elif mimetype.startswith("video/x-flv"):
video_format = "flv"
elif mimetype.startswith("video/3gpp"):
video_format = "3gp"
elif mimetype.startswith("video/webm"):
video_format = "webm"
else:
video_format = "unknown"
if quality == "small":
video_quality = "240p"
video_priority = 5
elif quality == "medium":
video_quality = "360p"
video_priority = 4
elif quality == "large":
video_quality = "480p"
video_priority = 3
elif quality == "hd720":
video_quality = "720p"
video_priority = 2
elif quality == "hd1080":
video_quality = "1080p"
video_priority = 1
else:
video_quality = "unknown"
stream_dict = {
'url' : video_url,
'quality' : video_quality,
'priority' : video_priority,
'format' : video_format
}
stream_pool.append(stream_dict) for stream in streams:
fields = urlparse.parse_qs(stream)
try: if len(fields) < 6:
video_title = unescape(re.search('<meta property="og:title" content="([^"]*)">', contents).group(1)) self.state = "failed"
except: raise ResolverError("The amount of fields in the YouTube player configuration is incorrect.")
raise ResolverError("Could not find the video title.")
signature = fields['sig'][0]
video_url = "%s&signature=%s" % (fields['url'][0], signature)
quality = fields['quality'][0]
fallback_host = fields['fallback_host'][0]
mimetype = fields['type'][0]
itag = fields['itag'][0]
if mimetype.startswith("video/mp4"):
video_format = "mp4"
elif mimetype.startswith("video/x-flv"):
video_format = "flv"
elif mimetype.startswith("video/3gpp"):
video_format = "3gp"
elif mimetype.startswith("video/webm"):
video_format = "webm"
else:
video_format = "unknown"
if quality == "small":
video_quality = "240p"
video_priority = 5
elif quality == "medium":
video_quality = "360p"
video_priority = 4
elif quality == "large":
video_quality = "480p"
video_priority = 3
elif quality == "hd720":
video_quality = "720p"
video_priority = 2
elif quality == "hd1080":
video_quality = "1080p"
video_priority = 1
else:
video_quality = "unknown"
video_priority = 0
print "UNKNOWN: %s" % quality
stream_dict = {
'url' : video_url,
'quality' : video_quality,
'priority' : video_priority,
'format' : video_format,
'extra' : {
'itag': itag,
'mimetype': mimetype,
'fallback_host': fallback_host
}
}
stream_pool.append(stream_dict)
try:
video_title = unescape(re.search('<meta property="og:title" content="([^"]*)">', contents).group(1))
except:
self.state = "failed"
raise ResolverError("Could not find the video title.")
self.results = {
'title': video_title,
'videos': stream_pool
}
return { 'title': video_title, 'videos': stream_pool } self.state = "finished"
return self

@ -1,4 +1,5 @@
from HTMLParser import HTMLParser from HTMLParser import HTMLParser
import cookielib, urllib2
import sys import sys
reload(sys) reload(sys)
@ -11,5 +12,45 @@ class ResolverError(Exception):
def __str__(self): def __str__(self):
return repr(self.val) return repr(self.val)
class Task():
captcha = None
cookiejar = None
useragent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.47 Safari/536.11"
opener = None
results = None
state = "none"
url = ""
result_type = "none"
extra_headers = {}
def __init__(self, url):
self.cookiejar = cookielib.CookieJar()
self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookiejar))
self.opener.addheaders = []
self.extra_headers['User-agent'] = self.useragent
for header, payload in self.extra_headers.iteritems():
self.opener.addheaders.append((header, payload))
self.url = url
def run(self):
self.state = "finished"
self.results = self.url
return self
def fetch_page(self, url):
return self.opener.open(url).read()
class Captcha():
image = ""
audio = ""
def __init__(image="", audio=""):
self.image = image
self.audio = audio
def unescape(s): def unescape(s):
return HTMLParser.unescape.__func__(HTMLParser, s) return HTMLParser.unescape.__func__(HTMLParser, s)

Loading…
Cancel
Save