Update the shared code and all resolvers for the new class-based model

master
Sven Slootweg 12 years ago
parent 92bceed168
commit 88e6819bf2

@ -1,21 +1,28 @@
import re import re
from resolvers import * import resolvers
def resolve(url): def resolve(url):
if re.match("https?:\/\/(www\.)?putlocker\.com", url) is not None: if re.match("https?:\/\/(www\.)?putlocker\.com", url) is not None:
return putlocker.resolve(url) task = resolvers.PutlockerTask(url)
return task.run()
elif re.match("https?:\/\/(www\.)?sockshare\.com", url) is not None: elif re.match("https?:\/\/(www\.)?sockshare\.com", url) is not None:
return sockshare.resolve(url) task = resolvers.SockshareTask(url)
return task.run()
elif re.match("https?:\/\/(www\.)?1channel\.ch\/external\.php", url) is not None: elif re.match("https?:\/\/(www\.)?1channel\.ch\/external\.php", url) is not None:
return onechannel.resolve(url) task = resolvers.OneChannelTask(url)
return task.run()
elif re.match("https?:\/\/(www\.)?youtube\.com\/watch\?", url) is not None: elif re.match("https?:\/\/(www\.)?youtube\.com\/watch\?", url) is not None:
return youtube.resolve(url) task = resolvers.YoutubeTask(url)
return task.run()
elif re.match("https?:\/\/(www\.)?filebox\.com\/[a-zA-Z0-9]+", url) is not None: elif re.match("https?:\/\/(www\.)?filebox\.com\/[a-zA-Z0-9]+", url) is not None:
return filebox.resolve(url) task = resolvers.FileboxTask(url)
return task.run()
elif re.match("https?:\/\/(www\.)?pastebin\.com\/[a-zA-Z0-9]+", url) is not None: elif re.match("https?:\/\/(www\.)?pastebin\.com\/[a-zA-Z0-9]+", url) is not None:
return pastebin.resolve(url) task = resolvers.PastebinTask(url)
return task.run()
elif re.match("https?:\/\/(www\.)?mediafire\.com\/\?[a-z0-9]+", url) is not None: elif re.match("https?:\/\/(www\.)?mediafire\.com\/\?[a-z0-9]+", url) is not None:
return mediafire.resolve(url) task = resolvers.MediafireTask(url)
return task.run()
else: else:
return {} return {}
@ -25,10 +32,10 @@ def recurse(url):
while True: while True:
result = resolve(url) result = resolve(url)
if result == {}: if result.state != "finished":
return previous_result return previous_result
elif 'url' not in result: elif result.result_type != "url":
return result return result
url = result['url'] url = result.results['url']
previous_result = result previous_result = result

@ -1,2 +1,9 @@
def resolve(input): from resolv.shared import Task
return {'dummy': input}
class DummyTask(Task):
result_type = "dummy"
def run(self):
self.results = {'dummy': self.url}
self.state = "finished"
return self

@ -1,23 +1,29 @@
import re, time, urllib2 import re, time, urllib2
from resolv.shared import ResolverError from resolv.shared import ResolverError, Task
def resolve(url): class FileboxTask(Task):
matches = re.search("https?:\/\/(www\.)?filebox\.com\/([a-zA-Z0-9]+)", url) result_type = "video"
def run(self):
matches = re.search("https?:\/\/(www\.)?filebox\.com\/([a-zA-Z0-9]+)", self.url)
if matches is None: if matches is None:
self.state = "invalid"
raise ResolverError("The provided URL is not a valid Filebox.com URL.") raise ResolverError("The provided URL is not a valid Filebox.com URL.")
video_id = matches.group(2) video_id = matches.group(2)
try: try:
contents = urllib2.urlopen("http://www.filebox.com/embed-%s-970x543.html" % video_id).read() contents = self.fetch_page("http://www.filebox.com/embed-%s-970x543.html" % video_id)
except: except urllib2.URLError, e:
self.state = "failed"
raise ResolverError("Could not retrieve the video page.") raise ResolverError("Could not retrieve the video page.")
matches = re.search("url: '([^']+)',", contents) matches = re.search("url: '([^']+)',", contents)
if matches is None: if matches is None:
raise ResolverError("No video was found on the specified URL.") self.state = "invalid"
raise ResolverError("No video was found on the specified URL. The Filebox.com resolver currently only supports videos.")
video_file = matches.group(1) video_file = matches.group(1)
@ -28,43 +34,10 @@ def resolve(url):
'format' : "unknown" 'format' : "unknown"
} }
return { 'title': "", 'videos': [stream_dict] } self.results = {
'title': "",
def resolve2(url): 'videos': [stream_dict]
# This is a fallback function in case no video could be found through the resolve() method. }
# It's not recommended to use it, as it introduces a 5 second wait.
try:
import mechanize
except ImportError:
raise ResolverError("The Python mechanize module is required to resolve Filebox.com URLs.")
matches = re.search("https?:\/\/(www\.)?filebox\.com\/([a-zA-Z0-9]+)", url)
if matches is None:
raise ResolverError("The provided URL is not a valid Filebox.com URL.")
try:
browser = mechanize.Browser()
browser.set_handle_robots(False)
browser.open(url)
except:
raise ResolverError("The Filebox.com site could not be reached.")
time.sleep(6)
try:
browser.select_form(nr=0)
result = browser.submit()
page = result.read()
except Exception, e:
raise ResolverError("The file was removed, or the URL is incorrect.")
matches = re.search("this\.play\('([^']+)'\)", page)
if matches is None:
raise ResolverError("No video file was found on the given URL; the Filebox.com server for this file may be in maintenance mode, or the given URL may not be a video file. The Filebox.com resolver currently only supports video links.")
video_file = matches.group(1)
return { 'title': "", 'videos': { 'video': video_file } } self.state = "finished"
return self

@ -1,15 +1,20 @@
import re, urllib2 import re, urllib2
from resolv.shared import ResolverError, unescape from resolv.shared import ResolverError, unescape, Task
def resolve(url): class MediafireTask(Task):
result_type = "file"
def run(self):
try: try:
contents = urllib2.urlopen(url).read() contents = self.fetch_page(self.url)
except: except urllib2.URLError, e:
self.state = "failed"
raise ResolverError("Could not retrieve the specified URL.") raise ResolverError("Could not retrieve the specified URL.")
matches = re.search('kNO = "([^"]+)";', contents) matches = re.search('kNO = "([^"]+)";', contents)
if matches is None: if matches is None:
self.state = "failed"
raise ResolverError("No download was found on the given URL; the server for this file may be in maintenance mode, or the given URL may not be valid. It is also possible that you have been blocked - CAPTCHA support is not yet present.") raise ResolverError("No download was found on the given URL; the server for this file may be in maintenance mode, or the given URL may not be valid. It is also possible that you have been blocked - CAPTCHA support is not yet present.")
file_url = matches.group(1) file_url = matches.group(1)
@ -17,6 +22,7 @@ def resolve(url):
try: try:
file_title = unescape(re.search('<title>([^<]+)<\/title>', contents).group(1)) file_title = unescape(re.search('<title>([^<]+)<\/title>', contents).group(1))
except: except:
self.state = "failed"
raise ResolverError("Could not find the download title.") raise ResolverError("Could not find the download title.")
file_dict = { file_dict = {
@ -25,4 +31,10 @@ def resolve(url):
'format' : "unknown" 'format' : "unknown"
} }
return { 'title': file_title, 'files': [file_dict] } self.results = {
'title': file_title,
'files': [file_dict]
}
self.state = "finished"
return self

@ -1,15 +1,22 @@
import re, base64 import re, base64
from resolv.shared import ResolverError from resolv.shared import ResolverError, Task
def resolve(url): class OneChannelTask(Task):
matches = re.search("https?:\/\/(www\.)?1channel\.ch\/external\.php\?.*url=([^&]+)", url) result_type = "url"
def run(self):
matches = re.search("https?:\/\/(www\.)?1channel\.ch\/external\.php\?.*url=([^&]+)", self.url)
if matches is None: if matches is None:
self.state = "invalid"
raise ResolverError("The provided URL is not a valid external 1channel URL.") raise ResolverError("The provided URL is not a valid external 1channel URL.")
try: try:
real_url = base64.b64decode(matches.group(2)).strip() real_url = base64.b64decode(matches.group(2)).strip()
except TypeError: except TypeError:
self.state = "failed"
raise ResolverError("The provided URL is malformed.") raise ResolverError("The provided URL is malformed.")
return { 'url': real_url } self.results = { 'url': real_url }
self.state = "finished"
return self

@ -1,30 +1,42 @@
import re, urllib, urllib2 import re, urllib2
from resolv.shared import ResolverError, unescape from resolv.shared import ResolverError, unescape, Task
def resolve(url): class PastebinTask(Task):
matches = re.search("https?:\/\/(www\.)?pastebin\.com\/([a-zA-Z0-9]+)", url) result_type = "text"
def run(self):
matches = re.search("https?:\/\/(www\.)?pastebin\.com\/([a-zA-Z0-9]+)", self.url)
if matches is None: if matches is None:
self.state = "invalid"
raise ResolverError("The provided URL is not a valid Pastebin URL.") raise ResolverError("The provided URL is not a valid Pastebin URL.")
paste_id = matches.group(2) paste_id = matches.group(2)
try: try:
contents = urllib2.urlopen(url).read() contents = self.fetch_page(self.url)
except: except urllib2.URLError, e:
raise ResolverError("Could not retrieve the specified URL. The specified paste may not exist.") self.state = "failed"
raise ResolverError("Could not retrieve the specified URL. The paste may not exist.")
matches = re.search("<h1>([^<]+)</h1>", contents) matches = re.search("<h1>([^<]+)</h1>", contents)
if matches is None: if matches is None:
self.state = "invalid"
raise ResolverError("The provided URL is not a valid paste.") raise ResolverError("The provided URL is not a valid paste.")
paste_title = unescape(matches.group(1)) paste_title = unescape(matches.group(1))
file_dict = { resolved = {
'url' : "http://pastebin.com/download.php?i=%s" % paste_id, 'url' : "http://pastebin.com/download.php?i=%s" % paste_id,
'priority' : 1, 'priority' : 1,
'format' : "text" 'format' : "text"
} }
return { 'title': paste_title, 'files': [file_dict] } self.results = {
'title': paste_title,
'files': [resolved]
}
self.state = "finished"
return self

@ -1,15 +1,20 @@
import re import re
from resolv.shared import ResolverError, unescape from resolv.shared import ResolverError, unescape, Task
def resolve(url): class PutlockerTask(Task):
result_type = "video"
def run(self):
try: try:
import mechanize import mechanize
except ImportError: except ImportError:
self.state = "failed"
raise ResolverError("The Python mechanize module is required to resolve PutLocker URLs.") raise ResolverError("The Python mechanize module is required to resolve PutLocker URLs.")
matches = re.search("https?:\/\/(www\.)?putlocker\.com\/(file|embed)\/([A-Z0-9]+)", url) matches = re.search("https?:\/\/(www\.)?putlocker\.com\/(file|embed)\/([A-Z0-9]+)", self.url)
if matches is None: if matches is None:
self.state = "invalid"
raise ResolverError("The provided URL is not a valid PutLocker URL.") raise ResolverError("The provided URL is not a valid PutLocker URL.")
video_id = matches.group(3) video_id = matches.group(3)
@ -19,6 +24,7 @@ def resolve(url):
browser.set_handle_robots(False) browser.set_handle_robots(False)
browser.open("http://putlocker.com/embed/%s" % video_id) browser.open("http://putlocker.com/embed/%s" % video_id)
except: except:
self.state = "failed"
raise ResolverError("The PutLocker site could not be reached.") raise ResolverError("The PutLocker site could not be reached.")
try: try:
@ -26,6 +32,7 @@ def resolve(url):
result = browser.submit() result = browser.submit()
page = result.read() page = result.read()
except Exception, e: except Exception, e:
self.state = "nonexistent"
raise ResolverError("The file was removed, or the URL is incorrect.") raise ResolverError("The file was removed, or the URL is incorrect.")
matches = re.search("playlist: '([^']+)'", page) matches = re.search("playlist: '([^']+)'", page)
@ -38,11 +45,13 @@ def resolve(url):
try: try:
browser.open("http://www.putlocker.com%s" % playlist) browser.open("http://www.putlocker.com%s" % playlist)
except: except:
self.state = "failed"
raise ResolverError("The playlist file for the given URL could not be loaded.") raise ResolverError("The playlist file for the given URL could not be loaded.")
matches = re.search("url=\"([^\"]+)\" type=\"video\/x-flv\"", browser.response().read()) matches = re.search("url=\"([^\"]+)\" type=\"video\/x-flv\"", browser.response().read())
if matches is None: if matches is None:
self.state = "failed"
raise ResolverError("The playlist file does not contain any video URLs. The PutLocker resolver currently only supports video links.") raise ResolverError("The playlist file does not contain any video URLs. The PutLocker resolver currently only supports video links.")
video_file = matches.group(1) video_file = matches.group(1)
@ -50,6 +59,7 @@ def resolve(url):
try: try:
video_title = unescape(re.search('<a href="\/file\/[^"]+"[^>]*><strong>([^<]*)<\/strong><\/a>', page).group(1)) video_title = unescape(re.search('<a href="\/file\/[^"]+"[^>]*><strong>([^<]*)<\/strong><\/a>', page).group(1))
except: except:
self.state = "failed"
raise ResolverError("Could not find the video title.") raise ResolverError("Could not find the video title.")
stream_dict = { stream_dict = {
@ -59,4 +69,10 @@ def resolve(url):
'format' : "unknown" 'format' : "unknown"
} }
return { 'title': video_title, 'videos': [stream_dict] } self.results = {
'title': video_title,
'videos': [stream_dict]
}
self.state = "finished"
return self

@ -1,16 +1,21 @@
import re import re
from resolv.shared import ResolverError, unescape from resolv.shared import ResolverError, unescape, Task
def resolve(url): class SockshareTask(Task):
result_type = "video"
def run(self):
try: try:
import mechanize import mechanize
except ImportError: except ImportError:
raise ResolverError("The Python mechanize module is required to resolve SockShare URLs.") self.state = "failed"
raise ResolverError("The Python mechanize module is required to resolve Sockshare URLs.")
matches = re.search("https?:\/\/(www\.)?sockshare\.com\/(file|embed)\/([A-Z0-9]+)", url) matches = re.search("https?:\/\/(www\.)?sockshare\.com\/(file|embed)\/([A-Z0-9]+)", self.url)
if matches is None: if matches is None:
raise ResolverError("The provided URL is not a valid SockShare URL.") self.state = "invalid"
raise ResolverError("The provided URL is not a valid Sockshare URL.")
video_id = matches.group(3) video_id = matches.group(3)
@ -19,37 +24,42 @@ def resolve(url):
browser.set_handle_robots(False) browser.set_handle_robots(False)
browser.open("http://sockshare.com/embed/%s" % video_id) browser.open("http://sockshare.com/embed/%s" % video_id)
except: except:
raise ResolverError("The SockShare site could not be reached.") self.state = "failed"
raise ResolverError("The Sockshare site could not be reached.")
try: try:
browser.select_form(nr=0) browser.select_form(nr=0)
result = browser.submit() result = browser.submit()
page = result.read() page = result.read()
except Exception, e: except Exception, e:
self.state = "nonexistent"
raise ResolverError("The file was removed, or the URL is incorrect.") raise ResolverError("The file was removed, or the URL is incorrect.")
matches = re.search("playlist: '([^']+)'", page) matches = re.search("playlist: '([^']+)'", page)
if matches is None: if matches is None:
raise ResolverError("No playlist was found on the given URL; the SockShare server for this file may be in maintenance mode, or the given URL may not be a video file. The SockShare resolver currently only supports video links.") raise ResolverError("No playlist was found on the given URL; the Sockshare server for this file may be in maintenance mode, or the given URL may not be a video file. The Sockshare resolver currently only supports video links.")
playlist = matches.group(1) playlist = matches.group(1)
try: try:
browser.open("http://www.sockshare.com%s" % playlist) browser.open("http://www.sockshare.com%s" % playlist)
except: except:
self.state = "failed"
raise ResolverError("The playlist file for the given URL could not be loaded.") raise ResolverError("The playlist file for the given URL could not be loaded.")
matches = re.search("url=\"([^\"]+)\" type=\"video\/x-flv\"", browser.response().read()) matches = re.search("url=\"([^\"]+)\" type=\"video\/x-flv\"", browser.response().read())
if matches is None: if matches is None:
raise ResolverError("The playlist file does not contain any video URLs. The SockShare resolver currently only supports video links.") self.state = "failed"
raise ResolverError("The playlist file does not contain any video URLs. The Sockshare resolver currently only supports video links.")
video_file = matches.group(1) video_file = matches.group(1)
try: try:
video_title = unescape(re.search('<a href="\/file\/[^"]+"[^>]*><strong>([^<]*)<\/strong><\/a>', page).group(1)) video_title = unescape(re.search('<a href="\/file\/[^"]+"[^>]*><strong>([^<]*)<\/strong><\/a>', page).group(1))
except: except:
self.state = "failed"
raise ResolverError("Could not find the video title.") raise ResolverError("Could not find the video title.")
stream_dict = { stream_dict = {
@ -59,4 +69,10 @@ def resolve(url):
'format' : "unknown" 'format' : "unknown"
} }
return { 'title': video_title, 'videos': [stream_dict] } self.results = {
'title': video_title,
'videos': [stream_dict]
}
self.state = "finished"
return self

@ -1,46 +1,61 @@
import re, urllib, urllib2 import re, urllib, urllib2, urlparse
from resolv.shared import ResolverError, unescape from resolv.shared import ResolverError, unescape, Task
def resolve(url): class YoutubeTask(Task):
result_type = "video"
extra_headers = {
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-us,en;q=0.5'
}
def run(self):
try: try:
contents = urllib2.urlopen(url).read() contents = self.fetch_page(self.url)
except: except urllib2.URLError, e:
self.state = "failed"
raise ResolverError("Could not retrieve the specified URL.") raise ResolverError("Could not retrieve the specified URL.")
map_start = "url_encoded_fmt_stream_map=" map_start = "url_encoded_fmt_stream_map="
map_end = "\\u0026amp;" map_end = "\\u0026amp;"
try: try:
pos_start = contents.index(map_start) + len(map_start) + 6 pos_start = contents.index(map_start) + len(map_start)
snippet = contents[pos_start:] snippet = contents[pos_start:]
except ValueError: except ValueError:
self.state = "failed"
raise ResolverError("The starting position for the YouTube player configuration could not be found. Is the URL really a valid video page?") raise ResolverError("The starting position for the YouTube player configuration could not be found. Is the URL really a valid video page?")
try: try:
pos_end = snippet.index(map_end) pos_end = snippet.index(map_end)
stream_map = snippet[:pos_end] stream_map = snippet[:pos_end]
except ValueError: except ValueError:
self.state = "failed"
raise ResolverError("The ending position for the YouTube player configuration could not be found.") raise ResolverError("The ending position for the YouTube player configuration could not be found.")
try: try:
stream_map = urllib.unquote(stream_map) stream_map = urllib.unquote(stream_map)
streams = stream_map.split(',url=') streams = stream_map.split(',')
except: except:
self.state = "failed"
raise ResolverError("The YouTube player configuration is corrupted.") raise ResolverError("The YouTube player configuration is corrupted.")
stream_pool = [] stream_pool = []
for stream in streams: for stream in streams:
fields = stream.split('&') fields = urlparse.parse_qs(stream)
if len(fields) < 5: if len(fields) < 6:
self.state = "failed"
raise ResolverError("The amount of fields in the YouTube player configuration is incorrect.") raise ResolverError("The amount of fields in the YouTube player configuration is incorrect.")
video_url = urllib.unquote(fields[0]) signature = fields['sig'][0]
quality = fields[1].split("=")[1] video_url = "%s&signature=%s" % (fields['url'][0], signature)
fallback_host = fields[2].split("=")[1] quality = fields['quality'][0]
mimetype = urllib.unquote(fields[3].split("=")[1]) fallback_host = fields['fallback_host'][0]
itag = fields[4].split("=", 2)[1] mimetype = fields['type'][0]
itag = fields['itag'][0]
if mimetype.startswith("video/mp4"): if mimetype.startswith("video/mp4"):
video_format = "mp4" video_format = "mp4"
@ -70,12 +85,19 @@ def resolve(url):
video_priority = 1 video_priority = 1
else: else:
video_quality = "unknown" video_quality = "unknown"
video_priority = 0
print "UNKNOWN: %s" % quality
stream_dict = { stream_dict = {
'url' : video_url, 'url' : video_url,
'quality' : video_quality, 'quality' : video_quality,
'priority' : video_priority, 'priority' : video_priority,
'format' : video_format 'format' : video_format,
'extra' : {
'itag': itag,
'mimetype': mimetype,
'fallback_host': fallback_host
}
} }
stream_pool.append(stream_dict) stream_pool.append(stream_dict)
@ -83,6 +105,13 @@ def resolve(url):
try: try:
video_title = unescape(re.search('<meta property="og:title" content="([^"]*)">', contents).group(1)) video_title = unescape(re.search('<meta property="og:title" content="([^"]*)">', contents).group(1))
except: except:
self.state = "failed"
raise ResolverError("Could not find the video title.") raise ResolverError("Could not find the video title.")
return { 'title': video_title, 'videos': stream_pool } self.results = {
'title': video_title,
'videos': stream_pool
}
self.state = "finished"
return self

@ -1,4 +1,5 @@
from HTMLParser import HTMLParser from HTMLParser import HTMLParser
import cookielib, urllib2
import sys import sys
reload(sys) reload(sys)
@ -11,5 +12,45 @@ class ResolverError(Exception):
def __str__(self): def __str__(self):
return repr(self.val) return repr(self.val)
class Task():
captcha = None
cookiejar = None
useragent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.47 Safari/536.11"
opener = None
results = None
state = "none"
url = ""
result_type = "none"
extra_headers = {}
def __init__(self, url):
self.cookiejar = cookielib.CookieJar()
self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookiejar))
self.opener.addheaders = []
self.extra_headers['User-agent'] = self.useragent
for header, payload in self.extra_headers.iteritems():
self.opener.addheaders.append((header, payload))
self.url = url
def run(self):
self.state = "finished"
self.results = self.url
return self
def fetch_page(self, url):
return self.opener.open(url).read()
class Captcha():
image = ""
audio = ""
def __init__(image="", audio=""):
self.image = image
self.audio = audio
def unescape(s): def unescape(s):
return HTMLParser.unescape.__func__(HTMLParser, s) return HTMLParser.unescape.__func__(HTMLParser, s)

Loading…
Cancel
Save