Update the shared code and all resolvers for the new class-based model

master
Sven Slootweg 12 years ago
parent 92bceed168
commit 88e6819bf2

@ -1,21 +1,28 @@
import re
from resolvers import *
import resolvers
def resolve(url):
if re.match("https?:\/\/(www\.)?putlocker\.com", url) is not None:
return putlocker.resolve(url)
task = resolvers.PutlockerTask(url)
return task.run()
elif re.match("https?:\/\/(www\.)?sockshare\.com", url) is not None:
return sockshare.resolve(url)
task = resolvers.SockshareTask(url)
return task.run()
elif re.match("https?:\/\/(www\.)?1channel\.ch\/external\.php", url) is not None:
return onechannel.resolve(url)
task = resolvers.OneChannelTask(url)
return task.run()
elif re.match("https?:\/\/(www\.)?youtube\.com\/watch\?", url) is not None:
return youtube.resolve(url)
task = resolvers.YoutubeTask(url)
return task.run()
elif re.match("https?:\/\/(www\.)?filebox\.com\/[a-zA-Z0-9]+", url) is not None:
return filebox.resolve(url)
task = resolvers.FileboxTask(url)
return task.run()
elif re.match("https?:\/\/(www\.)?pastebin\.com\/[a-zA-Z0-9]+", url) is not None:
return pastebin.resolve(url)
task = resolvers.PastebinTask(url)
return task.run()
elif re.match("https?:\/\/(www\.)?mediafire\.com\/\?[a-z0-9]+", url) is not None:
return mediafire.resolve(url)
task = resolvers.MediafireTask(url)
return task.run()
else:
return {}
@ -25,10 +32,10 @@ def recurse(url):
while True:
result = resolve(url)
if result == {}:
if result.state != "finished":
return previous_result
elif 'url' not in result:
elif result.result_type != "url":
return result
url = result['url']
url = result.results['url']
previous_result = result

@ -1,2 +1,9 @@
def resolve(input):
return {'dummy': input}
from resolv.shared import Task
class DummyTask(Task):
result_type = "dummy"
def run(self):
self.results = {'dummy': self.url}
self.state = "finished"
return self

@ -1,70 +1,43 @@
import re, time, urllib2
from resolv.shared import ResolverError
from resolv.shared import ResolverError, Task
def resolve(url):
matches = re.search("https?:\/\/(www\.)?filebox\.com\/([a-zA-Z0-9]+)", url)
class FileboxTask(Task):
result_type = "video"
if matches is None:
raise ResolverError("The provided URL is not a valid Filebox.com URL.")
def run(self):
matches = re.search("https?:\/\/(www\.)?filebox\.com\/([a-zA-Z0-9]+)", self.url)
video_id = matches.group(2)
if matches is None:
self.state = "invalid"
raise ResolverError("The provided URL is not a valid Filebox.com URL.")
try:
contents = urllib2.urlopen("http://www.filebox.com/embed-%s-970x543.html" % video_id).read()
except:
raise ResolverError("Could not retrieve the video page.")
video_id = matches.group(2)
matches = re.search("url: '([^']+)',", contents)
try:
contents = self.fetch_page("http://www.filebox.com/embed-%s-970x543.html" % video_id)
except urllib2.URLError, e:
self.state = "failed"
raise ResolverError("Could not retrieve the video page.")
if matches is None:
raise ResolverError("No video was found on the specified URL.")
matches = re.search("url: '([^']+)',", contents)
video_file = matches.group(1)
if matches is None:
self.state = "invalid"
raise ResolverError("No video was found on the specified URL. The Filebox.com resolver currently only supports videos.")
stream_dict = {
'url' : video_file,
'quality' : "unknown",
'priority' : 1,
'format' : "unknown"
}
video_file = matches.group(1)
return { 'title': "", 'videos': [stream_dict] }
stream_dict = {
'url' : video_file,
'quality' : "unknown",
'priority' : 1,
'format' : "unknown"
}
def resolve2(url):
# This is a fallback function in case no video could be found through the resolve() method.
# It's not recommended to use it, as it introduces a 5 second wait.
self.results = {
'title': "",
'videos': [stream_dict]
}
try:
import mechanize
except ImportError:
raise ResolverError("The Python mechanize module is required to resolve Filebox.com URLs.")
matches = re.search("https?:\/\/(www\.)?filebox\.com\/([a-zA-Z0-9]+)", url)
if matches is None:
raise ResolverError("The provided URL is not a valid Filebox.com URL.")
try:
browser = mechanize.Browser()
browser.set_handle_robots(False)
browser.open(url)
except:
raise ResolverError("The Filebox.com site could not be reached.")
time.sleep(6)
try:
browser.select_form(nr=0)
result = browser.submit()
page = result.read()
except Exception, e:
raise ResolverError("The file was removed, or the URL is incorrect.")
matches = re.search("this\.play\('([^']+)'\)", page)
if matches is None:
raise ResolverError("No video file was found on the given URL; the Filebox.com server for this file may be in maintenance mode, or the given URL may not be a video file. The Filebox.com resolver currently only supports video links.")
video_file = matches.group(1)
return { 'title': "", 'videos': { 'video': video_file } }
self.state = "finished"
return self

@ -1,28 +1,40 @@
import re, urllib2
from resolv.shared import ResolverError, unescape
from resolv.shared import ResolverError, unescape, Task
def resolve(url):
try:
contents = urllib2.urlopen(url).read()
except:
raise ResolverError("Could not retrieve the specified URL.")
class MediafireTask(Task):
result_type = "file"
matches = re.search('kNO = "([^"]+)";', contents)
def run(self):
try:
contents = self.fetch_page(self.url)
except urllib2.URLError, e:
self.state = "failed"
raise ResolverError("Could not retrieve the specified URL.")
if matches is None:
raise ResolverError("No download was found on the given URL; the server for this file may be in maintenance mode, or the given URL may not be valid. It is also possible that you have been blocked - CAPTCHA support is not yet present.")
matches = re.search('kNO = "([^"]+)";', contents)
file_url = matches.group(1)
if matches is None:
self.state = "failed"
raise ResolverError("No download was found on the given URL; the server for this file may be in maintenance mode, or the given URL may not be valid. It is also possible that you have been blocked - CAPTCHA support is not yet present.")
try:
file_title = unescape(re.search('<title>([^<]+)<\/title>', contents).group(1))
except:
raise ResolverError("Could not find the download title.")
file_url = matches.group(1)
file_dict = {
'url' : file_url,
'priority' : 1,
'format' : "unknown"
}
try:
file_title = unescape(re.search('<title>([^<]+)<\/title>', contents).group(1))
except:
self.state = "failed"
raise ResolverError("Could not find the download title.")
return { 'title': file_title, 'files': [file_dict] }
file_dict = {
'url' : file_url,
'priority' : 1,
'format' : "unknown"
}
self.results = {
'title': file_title,
'files': [file_dict]
}
self.state = "finished"
return self

@ -1,15 +1,22 @@
import re, base64
from resolv.shared import ResolverError
from resolv.shared import ResolverError, Task
def resolve(url):
matches = re.search("https?:\/\/(www\.)?1channel\.ch\/external\.php\?.*url=([^&]+)", url)
class OneChannelTask(Task):
result_type = "url"
if matches is None:
raise ResolverError("The provided URL is not a valid external 1channel URL.")
def run(self):
matches = re.search("https?:\/\/(www\.)?1channel\.ch\/external\.php\?.*url=([^&]+)", self.url)
try:
real_url = base64.b64decode(matches.group(2)).strip()
except TypeError:
raise ResolverError("The provided URL is malformed.")
if matches is None:
self.state = "invalid"
raise ResolverError("The provided URL is not a valid external 1channel URL.")
return { 'url': real_url }
try:
real_url = base64.b64decode(matches.group(2)).strip()
except TypeError:
self.state = "failed"
raise ResolverError("The provided URL is malformed.")
self.results = { 'url': real_url }
self.state = "finished"
return self

@ -1,30 +1,42 @@
import re, urllib, urllib2
from resolv.shared import ResolverError, unescape
import re, urllib2
from resolv.shared import ResolverError, unescape, Task
def resolve(url):
matches = re.search("https?:\/\/(www\.)?pastebin\.com\/([a-zA-Z0-9]+)", url)
class PastebinTask(Task):
result_type = "text"
if matches is None:
raise ResolverError("The provided URL is not a valid Pastebin URL.")
def run(self):
matches = re.search("https?:\/\/(www\.)?pastebin\.com\/([a-zA-Z0-9]+)", self.url)
paste_id = matches.group(2)
if matches is None:
self.state = "invalid"
raise ResolverError("The provided URL is not a valid Pastebin URL.")
try:
contents = urllib2.urlopen(url).read()
except:
raise ResolverError("Could not retrieve the specified URL. The specified paste may not exist.")
paste_id = matches.group(2)
matches = re.search("<h1>([^<]+)</h1>", contents)
try:
contents = self.fetch_page(self.url)
except urllib2.URLError, e:
self.state = "failed"
raise ResolverError("Could not retrieve the specified URL. The paste may not exist.")
if matches is None:
raise ResolverError("The provided URL is not a valid paste.")
matches = re.search("<h1>([^<]+)</h1>", contents)
paste_title = unescape(matches.group(1))
if matches is None:
self.state = "invalid"
raise ResolverError("The provided URL is not a valid paste.")
file_dict = {
'url' : "http://pastebin.com/download.php?i=%s" % paste_id,
'priority' : 1,
'format' : "text"
}
paste_title = unescape(matches.group(1))
return { 'title': paste_title, 'files': [file_dict] }
resolved = {
'url' : "http://pastebin.com/download.php?i=%s" % paste_id,
'priority' : 1,
'format' : "text"
}
self.results = {
'title': paste_title,
'files': [resolved]
}
self.state = "finished"
return self

@ -1,62 +1,78 @@
import re
from resolv.shared import ResolverError, unescape
def resolve(url):
try:
import mechanize
except ImportError:
raise ResolverError("The Python mechanize module is required to resolve PutLocker URLs.")
matches = re.search("https?:\/\/(www\.)?putlocker\.com\/(file|embed)\/([A-Z0-9]+)", url)
if matches is None:
raise ResolverError("The provided URL is not a valid PutLocker URL.")
video_id = matches.group(3)
try:
browser = mechanize.Browser()
browser.set_handle_robots(False)
browser.open("http://putlocker.com/embed/%s" % video_id)
except:
raise ResolverError("The PutLocker site could not be reached.")
try:
browser.select_form(nr=0)
result = browser.submit()
page = result.read()
except Exception, e:
raise ResolverError("The file was removed, or the URL is incorrect.")
matches = re.search("playlist: '([^']+)'", page)
if matches is None:
raise ResolverError("No playlist was found on the given URL; the PutLocker server for this file may be in maintenance mode, or the given URL may not be a video file. The PutLocker resolver currently only supports video links.")
playlist = matches.group(1)
try:
browser.open("http://www.putlocker.com%s" % playlist)
except:
raise ResolverError("The playlist file for the given URL could not be loaded.")
matches = re.search("url=\"([^\"]+)\" type=\"video\/x-flv\"", browser.response().read())
if matches is None:
raise ResolverError("The playlist file does not contain any video URLs. The PutLocker resolver currently only supports video links.")
video_file = matches.group(1)
try:
video_title = unescape(re.search('<a href="\/file\/[^"]+"[^>]*><strong>([^<]*)<\/strong><\/a>', page).group(1))
except:
raise ResolverError("Could not find the video title.")
stream_dict = {
'url' : video_file,
'quality' : "unknown",
'priority' : 1,
'format' : "unknown"
}
return { 'title': video_title, 'videos': [stream_dict] }
from resolv.shared import ResolverError, unescape, Task
class PutlockerTask(Task):
result_type = "video"
def run(self):
try:
import mechanize
except ImportError:
self.state = "failed"
raise ResolverError("The Python mechanize module is required to resolve PutLocker URLs.")
matches = re.search("https?:\/\/(www\.)?putlocker\.com\/(file|embed)\/([A-Z0-9]+)", self.url)
if matches is None:
self.state = "invalid"
raise ResolverError("The provided URL is not a valid PutLocker URL.")
video_id = matches.group(3)
try:
browser = mechanize.Browser()
browser.set_handle_robots(False)
browser.open("http://putlocker.com/embed/%s" % video_id)
except:
self.state = "failed"
raise ResolverError("The PutLocker site could not be reached.")
try:
browser.select_form(nr=0)
result = browser.submit()
page = result.read()
except Exception, e:
self.state = "nonexistent"
raise ResolverError("The file was removed, or the URL is incorrect.")
matches = re.search("playlist: '([^']+)'", page)
if matches is None:
raise ResolverError("No playlist was found on the given URL; the PutLocker server for this file may be in maintenance mode, or the given URL may not be a video file. The PutLocker resolver currently only supports video links.")
playlist = matches.group(1)
try:
browser.open("http://www.putlocker.com%s" % playlist)
except:
self.state = "failed"
raise ResolverError("The playlist file for the given URL could not be loaded.")
matches = re.search("url=\"([^\"]+)\" type=\"video\/x-flv\"", browser.response().read())
if matches is None:
self.state = "failed"
raise ResolverError("The playlist file does not contain any video URLs. The PutLocker resolver currently only supports video links.")
video_file = matches.group(1)
try:
video_title = unescape(re.search('<a href="\/file\/[^"]+"[^>]*><strong>([^<]*)<\/strong><\/a>', page).group(1))
except:
self.state = "failed"
raise ResolverError("Could not find the video title.")
stream_dict = {
'url' : video_file,
'quality' : "unknown",
'priority' : 1,
'format' : "unknown"
}
self.results = {
'title': video_title,
'videos': [stream_dict]
}
self.state = "finished"
return self

@ -1,62 +1,78 @@
import re
from resolv.shared import ResolverError, unescape
def resolve(url):
try:
import mechanize
except ImportError:
raise ResolverError("The Python mechanize module is required to resolve SockShare URLs.")
matches = re.search("https?:\/\/(www\.)?sockshare\.com\/(file|embed)\/([A-Z0-9]+)", url)
if matches is None:
raise ResolverError("The provided URL is not a valid SockShare URL.")
video_id = matches.group(3)
try:
browser = mechanize.Browser()
browser.set_handle_robots(False)
browser.open("http://sockshare.com/embed/%s" % video_id)
except:
raise ResolverError("The SockShare site could not be reached.")
try:
browser.select_form(nr=0)
result = browser.submit()
page = result.read()
except Exception, e:
raise ResolverError("The file was removed, or the URL is incorrect.")
matches = re.search("playlist: '([^']+)'", page)
if matches is None:
raise ResolverError("No playlist was found on the given URL; the SockShare server for this file may be in maintenance mode, or the given URL may not be a video file. The SockShare resolver currently only supports video links.")
playlist = matches.group(1)
try:
browser.open("http://www.sockshare.com%s" % playlist)
except:
raise ResolverError("The playlist file for the given URL could not be loaded.")
matches = re.search("url=\"([^\"]+)\" type=\"video\/x-flv\"", browser.response().read())
if matches is None:
raise ResolverError("The playlist file does not contain any video URLs. The SockShare resolver currently only supports video links.")
video_file = matches.group(1)
try:
video_title = unescape(re.search('<a href="\/file\/[^"]+"[^>]*><strong>([^<]*)<\/strong><\/a>', page).group(1))
except:
raise ResolverError("Could not find the video title.")
stream_dict = {
'url' : video_file,
'quality' : "unknown",
'priority' : 1,
'format' : "unknown"
}
return { 'title': video_title, 'videos': [stream_dict] }
from resolv.shared import ResolverError, unescape, Task
class SockshareTask(Task):
result_type = "video"
def run(self):
try:
import mechanize
except ImportError:
self.state = "failed"
raise ResolverError("The Python mechanize module is required to resolve Sockshare URLs.")
matches = re.search("https?:\/\/(www\.)?sockshare\.com\/(file|embed)\/([A-Z0-9]+)", self.url)
if matches is None:
self.state = "invalid"
raise ResolverError("The provided URL is not a valid Sockshare URL.")
video_id = matches.group(3)
try:
browser = mechanize.Browser()
browser.set_handle_robots(False)
browser.open("http://sockshare.com/embed/%s" % video_id)
except:
self.state = "failed"
raise ResolverError("The Sockshare site could not be reached.")
try:
browser.select_form(nr=0)
result = browser.submit()
page = result.read()
except Exception, e:
self.state = "nonexistent"
raise ResolverError("The file was removed, or the URL is incorrect.")
matches = re.search("playlist: '([^']+)'", page)
if matches is None:
raise ResolverError("No playlist was found on the given URL; the Sockshare server for this file may be in maintenance mode, or the given URL may not be a video file. The Sockshare resolver currently only supports video links.")
playlist = matches.group(1)
try:
browser.open("http://www.sockshare.com%s" % playlist)
except:
self.state = "failed"
raise ResolverError("The playlist file for the given URL could not be loaded.")
matches = re.search("url=\"([^\"]+)\" type=\"video\/x-flv\"", browser.response().read())
if matches is None:
self.state = "failed"
raise ResolverError("The playlist file does not contain any video URLs. The Sockshare resolver currently only supports video links.")
video_file = matches.group(1)
try:
video_title = unescape(re.search('<a href="\/file\/[^"]+"[^>]*><strong>([^<]*)<\/strong><\/a>', page).group(1))
except:
self.state = "failed"
raise ResolverError("Could not find the video title.")
stream_dict = {
'url' : video_file,
'quality' : "unknown",
'priority' : 1,
'format' : "unknown"
}
self.results = {
'title': video_title,
'videos': [stream_dict]
}
self.state = "finished"
return self

@ -1,88 +1,117 @@
import re, urllib, urllib2
from resolv.shared import ResolverError, unescape
def resolve(url):
try:
contents = urllib2.urlopen(url).read()
except:
raise ResolverError("Could not retrieve the specified URL.")
map_start = "url_encoded_fmt_stream_map="
map_end = "\\u0026amp;"
try:
pos_start = contents.index(map_start) + len(map_start) + 6
snippet = contents[pos_start:]
except ValueError:
raise ResolverError("The starting position for the YouTube player configuration could not be found. Is the URL really a valid video page?")
try:
pos_end = snippet.index(map_end)
stream_map = snippet[:pos_end]
except ValueError:
raise ResolverError("The ending position for the YouTube player configuration could not be found.")
try:
stream_map = urllib.unquote(stream_map)
streams = stream_map.split(',url=')
except:
raise ResolverError("The YouTube player configuration is corrupted.")
stream_pool = []
for stream in streams:
fields = stream.split('&')
if len(fields) < 5:
raise ResolverError("The amount of fields in the YouTube player configuration is incorrect.")
video_url = urllib.unquote(fields[0])
quality = fields[1].split("=")[1]
fallback_host = fields[2].split("=")[1]
mimetype = urllib.unquote(fields[3].split("=")[1])
itag = fields[4].split("=", 2)[1]
if mimetype.startswith("video/mp4"):
video_format = "mp4"
elif mimetype.startswith("video/x-flv"):
video_format = "flv"
elif mimetype.startswith("video/3gpp"):
video_format = "3gp"
elif mimetype.startswith("video/webm"):
video_format = "webm"
else:
video_format = "unknown"
if quality == "small":
video_quality = "240p"
video_priority = 5
elif quality == "medium":
video_quality = "360p"
video_priority = 4
elif quality == "large":
video_quality = "480p"
video_priority = 3
elif quality == "hd720":
video_quality = "720p"
video_priority = 2
elif quality == "hd1080":
video_quality = "1080p"
video_priority = 1
else:
video_quality = "unknown"
stream_dict = {
'url' : video_url,
'quality' : video_quality,
'priority' : video_priority,
'format' : video_format
}
import re, urllib, urllib2, urlparse
from resolv.shared import ResolverError, unescape, Task
class YoutubeTask(Task):
result_type = "video"
extra_headers = {
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-us,en;q=0.5'
}
def run(self):
try:
contents = self.fetch_page(self.url)
except urllib2.URLError, e:
self.state = "failed"
raise ResolverError("Could not retrieve the specified URL.")
map_start = "url_encoded_fmt_stream_map="
map_end = "\\u0026amp;"
try:
pos_start = contents.index(map_start) + len(map_start)
snippet = contents[pos_start:]
except ValueError:
self.state = "failed"
raise ResolverError("The starting position for the YouTube player configuration could not be found. Is the URL really a valid video page?")
try:
pos_end = snippet.index(map_end)
stream_map = snippet[:pos_end]
except ValueError:
self.state = "failed"
raise ResolverError("The ending position for the YouTube player configuration could not be found.")
try:
stream_map = urllib.unquote(stream_map)
streams = stream_map.split(',')
except:
self.state = "failed"
raise ResolverError("The YouTube player configuration is corrupted.")
stream_pool = []
stream_pool.append(stream_dict)
for stream in streams:
fields = urlparse.parse_qs(stream)
try:
video_title = unescape(re.search('<meta property="og:title" content="([^"]*)">', contents).group(1))
except:
raise ResolverError("Could not find the video title.")
if len(fields) < 6:
self.state = "failed"
raise ResolverError("The amount of fields in the YouTube player configuration is incorrect.")
signature = fields['sig'][0]
video_url = "%s&signature=%s" % (fields['url'][0], signature)
quality = fields['quality'][0]
fallback_host = fields['fallback_host'][0]
mimetype = fields['type'][0]
itag = fields['itag'][0]
if mimetype.startswith("video/mp4"):
video_format = "mp4"
elif mimetype.startswith("video/x-flv"):
video_format = "flv"
elif mimetype.startswith("video/3gpp"):
video_format = "3gp"
elif mimetype.startswith("video/webm"):
video_format = "webm"
else:
video_format = "unknown"
if quality == "small":
video_quality = "240p"
video_priority = 5
elif quality == "medium":
video_quality = "360p"
video_priority = 4
elif quality == "large":
video_quality = "480p"
video_priority = 3
elif quality == "hd720":
video_quality = "720p"
video_priority = 2
elif quality == "hd1080":
video_quality = "1080p"
video_priority = 1
else:
video_quality = "unknown"
video_priority = 0
print "UNKNOWN: %s" % quality
stream_dict = {
'url' : video_url,
'quality' : video_quality,
'priority' : video_priority,
'format' : video_format,
'extra' : {
'itag': itag,
'mimetype': mimetype,
'fallback_host': fallback_host
}
}
stream_pool.append(stream_dict)
try:
video_title = unescape(re.search('<meta property="og:title" content="([^"]*)">', contents).group(1))
except:
self.state = "failed"
raise ResolverError("Could not find the video title.")
self.results = {
'title': video_title,
'videos': stream_pool
}
return { 'title': video_title, 'videos': stream_pool }
self.state = "finished"
return self

@ -1,4 +1,5 @@
from HTMLParser import HTMLParser
import cookielib, urllib2
import sys
reload(sys)
@ -11,5 +12,45 @@ class ResolverError(Exception):
def __str__(self):
return repr(self.val)
class Task():
captcha = None
cookiejar = None
useragent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.47 Safari/536.11"
opener = None
results = None
state = "none"
url = ""
result_type = "none"
extra_headers = {}
def __init__(self, url):
self.cookiejar = cookielib.CookieJar()
self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookiejar))
self.opener.addheaders = []
self.extra_headers['User-agent'] = self.useragent
for header, payload in self.extra_headers.iteritems():
self.opener.addheaders.append((header, payload))
self.url = url
def run(self):
self.state = "finished"
self.results = self.url
return self
def fetch_page(self, url):
return self.opener.open(url).read()
class Captcha():
image = ""
audio = ""
def __init__(image="", audio=""):
self.image = image
self.audio = audio
def unescape(s):
return HTMLParser.unescape.__func__(HTMLParser, s)

Loading…
Cancel
Save