From 88e6819bf2200ce0f1b3af65778001876a2937db Mon Sep 17 00:00:00 2001 From: Sven Slootweg Date: Fri, 26 Oct 2012 17:51:27 +0200 Subject: [PATCH] Update the shared code and all resolvers for the new class-based model --- resolv/__init__.py | 29 ++++-- resolv/resolvers/dummy.py | 11 +- resolv/resolvers/filebox.py | 109 ++++++++----------- resolv/resolvers/mediafire.py | 62 ++++++----- resolv/resolvers/onechannel.py | 31 +++--- resolv/resolvers/pastebin.py | 70 +++++++------ resolv/resolvers/putlocker.py | 132 ++++++++++++----------- resolv/resolvers/sockshare.py | 132 ++++++++++++----------- resolv/resolvers/youtube.py | 185 +++++++++++++++++++-------------- resolv/shared.py | 41 ++++++++ 10 files changed, 461 insertions(+), 341 deletions(-) diff --git a/resolv/__init__.py b/resolv/__init__.py index 6c0ac33..aceb7e9 100644 --- a/resolv/__init__.py +++ b/resolv/__init__.py @@ -1,21 +1,28 @@ import re -from resolvers import * +import resolvers def resolve(url): if re.match("https?:\/\/(www\.)?putlocker\.com", url) is not None: - return putlocker.resolve(url) + task = resolvers.PutlockerTask(url) + return task.run() elif re.match("https?:\/\/(www\.)?sockshare\.com", url) is not None: - return sockshare.resolve(url) + task = resolvers.SockshareTask(url) + return task.run() elif re.match("https?:\/\/(www\.)?1channel\.ch\/external\.php", url) is not None: - return onechannel.resolve(url) + task = resolvers.OneChannelTask(url) + return task.run() elif re.match("https?:\/\/(www\.)?youtube\.com\/watch\?", url) is not None: - return youtube.resolve(url) + task = resolvers.YoutubeTask(url) + return task.run() elif re.match("https?:\/\/(www\.)?filebox\.com\/[a-zA-Z0-9]+", url) is not None: - return filebox.resolve(url) + task = resolvers.FileboxTask(url) + return task.run() elif re.match("https?:\/\/(www\.)?pastebin\.com\/[a-zA-Z0-9]+", url) is not None: - return pastebin.resolve(url) + task = resolvers.PastebinTask(url) + return task.run() elif re.match("https?:\/\/(www\.)?mediafire\.com\/\?[a-z0-9]+", url) is not None: - return mediafire.resolve(url) + task = resolvers.MediafireTask(url) + return task.run() else: return {} @@ -25,10 +32,10 @@ def recurse(url): while True: result = resolve(url) - if result == {}: + if result.state != "finished": return previous_result - elif 'url' not in result: + elif result.result_type != "url": return result - url = result['url'] + url = result.results['url'] previous_result = result diff --git a/resolv/resolvers/dummy.py b/resolv/resolvers/dummy.py index deeb57f..152a78c 100644 --- a/resolv/resolvers/dummy.py +++ b/resolv/resolvers/dummy.py @@ -1,2 +1,9 @@ -def resolve(input): - return {'dummy': input} +from resolv.shared import Task + +class DummyTask(Task): + result_type = "dummy" + + def run(self): + self.results = {'dummy': self.url} + self.state = "finished" + return self diff --git a/resolv/resolvers/filebox.py b/resolv/resolvers/filebox.py index c8bdff1..cc6c27a 100644 --- a/resolv/resolvers/filebox.py +++ b/resolv/resolvers/filebox.py @@ -1,70 +1,43 @@ import re, time, urllib2 -from resolv.shared import ResolverError +from resolv.shared import ResolverError, Task -def resolve(url): - matches = re.search("https?:\/\/(www\.)?filebox\.com\/([a-zA-Z0-9]+)", url) - - if matches is None: - raise ResolverError("The provided URL is not a valid Filebox.com URL.") - - video_id = matches.group(2) - - try: - contents = urllib2.urlopen("http://www.filebox.com/embed-%s-970x543.html" % video_id).read() - except: - raise ResolverError("Could not retrieve the video page.") - - matches = re.search("url: '([^']+)',", contents) - - if matches is None: - raise ResolverError("No video was found on the specified URL.") - - video_file = matches.group(1) - - stream_dict = { - 'url' : video_file, - 'quality' : "unknown", - 'priority' : 1, - 'format' : "unknown" - } - - return { 'title': "", 'videos': [stream_dict] } - -def resolve2(url): - # This is a fallback function in case no video could be found through the resolve() method. - # It's not recommended to use it, as it introduces a 5 second wait. - - try: - import mechanize - except ImportError: - raise ResolverError("The Python mechanize module is required to resolve Filebox.com URLs.") - - matches = re.search("https?:\/\/(www\.)?filebox\.com\/([a-zA-Z0-9]+)", url) - - if matches is None: - raise ResolverError("The provided URL is not a valid Filebox.com URL.") - - try: - browser = mechanize.Browser() - browser.set_handle_robots(False) - browser.open(url) - except: - raise ResolverError("The Filebox.com site could not be reached.") - - time.sleep(6) - - try: - browser.select_form(nr=0) - result = browser.submit() - page = result.read() - except Exception, e: - raise ResolverError("The file was removed, or the URL is incorrect.") - - matches = re.search("this\.play\('([^']+)'\)", page) - - if matches is None: - raise ResolverError("No video file was found on the given URL; the Filebox.com server for this file may be in maintenance mode, or the given URL may not be a video file. The Filebox.com resolver currently only supports video links.") - - video_file = matches.group(1) - - return { 'title': "", 'videos': { 'video': video_file } } +class FileboxTask(Task): + result_type = "video" + + def run(self): + matches = re.search("https?:\/\/(www\.)?filebox\.com\/([a-zA-Z0-9]+)", self.url) + + if matches is None: + self.state = "invalid" + raise ResolverError("The provided URL is not a valid Filebox.com URL.") + + video_id = matches.group(2) + + try: + contents = self.fetch_page("http://www.filebox.com/embed-%s-970x543.html" % video_id) + except urllib2.URLError, e: + self.state = "failed" + raise ResolverError("Could not retrieve the video page.") + + matches = re.search("url: '([^']+)',", contents) + + if matches is None: + self.state = "invalid" + raise ResolverError("No video was found on the specified URL. The Filebox.com resolver currently only supports videos.") + + video_file = matches.group(1) + + stream_dict = { + 'url' : video_file, + 'quality' : "unknown", + 'priority' : 1, + 'format' : "unknown" + } + + self.results = { + 'title': "", + 'videos': [stream_dict] + } + + self.state = "finished" + return self diff --git a/resolv/resolvers/mediafire.py b/resolv/resolvers/mediafire.py index 1e3641a..a3452af 100644 --- a/resolv/resolvers/mediafire.py +++ b/resolv/resolvers/mediafire.py @@ -1,28 +1,40 @@ import re, urllib2 -from resolv.shared import ResolverError, unescape +from resolv.shared import ResolverError, unescape, Task -def resolve(url): - try: - contents = urllib2.urlopen(url).read() - except: - raise ResolverError("Could not retrieve the specified URL.") +class MediafireTask(Task): + result_type = "file" - matches = re.search('kNO = "([^"]+)";', contents) - - if matches is None: - raise ResolverError("No download was found on the given URL; the server for this file may be in maintenance mode, or the given URL may not be valid. It is also possible that you have been blocked - CAPTCHA support is not yet present.") - - file_url = matches.group(1) - - try: - file_title = unescape(re.search('([^<]+)<\/title>', contents).group(1)) - except: - raise ResolverError("Could not find the download title.") - - file_dict = { - 'url' : file_url, - 'priority' : 1, - 'format' : "unknown" - } - - return { 'title': file_title, 'files': [file_dict] } + def run(self): + try: + contents = self.fetch_page(self.url) + except urllib2.URLError, e: + self.state = "failed" + raise ResolverError("Could not retrieve the specified URL.") + + matches = re.search('kNO = "([^"]+)";', contents) + + if matches is None: + self.state = "failed" + raise ResolverError("No download was found on the given URL; the server for this file may be in maintenance mode, or the given URL may not be valid. It is also possible that you have been blocked - CAPTCHA support is not yet present.") + + file_url = matches.group(1) + + try: + file_title = unescape(re.search('<title>([^<]+)<\/title>', contents).group(1)) + except: + self.state = "failed" + raise ResolverError("Could not find the download title.") + + file_dict = { + 'url' : file_url, + 'priority' : 1, + 'format' : "unknown" + } + + self.results = { + 'title': file_title, + 'files': [file_dict] + } + + self.state = "finished" + return self diff --git a/resolv/resolvers/onechannel.py b/resolv/resolvers/onechannel.py index d94ed7d..068096e 100644 --- a/resolv/resolvers/onechannel.py +++ b/resolv/resolvers/onechannel.py @@ -1,15 +1,22 @@ import re, base64 -from resolv.shared import ResolverError +from resolv.shared import ResolverError, Task -def resolve(url): - matches = re.search("https?:\/\/(www\.)?1channel\.ch\/external\.php\?.*url=([^&]+)", url) - - if matches is None: - raise ResolverError("The provided URL is not a valid external 1channel URL.") - - try: - real_url = base64.b64decode(matches.group(2)).strip() - except TypeError: - raise ResolverError("The provided URL is malformed.") +class OneChannelTask(Task): + result_type = "url" - return { 'url': real_url } + def run(self): + matches = re.search("https?:\/\/(www\.)?1channel\.ch\/external\.php\?.*url=([^&]+)", self.url) + + if matches is None: + self.state = "invalid" + raise ResolverError("The provided URL is not a valid external 1channel URL.") + + try: + real_url = base64.b64decode(matches.group(2)).strip() + except TypeError: + self.state = "failed" + raise ResolverError("The provided URL is malformed.") + + self.results = { 'url': real_url } + self.state = "finished" + return self diff --git a/resolv/resolvers/pastebin.py b/resolv/resolvers/pastebin.py index 4f2089f..061b24b 100644 --- a/resolv/resolvers/pastebin.py +++ b/resolv/resolvers/pastebin.py @@ -1,30 +1,42 @@ -import re, urllib, urllib2 -from resolv.shared import ResolverError, unescape +import re, urllib2 +from resolv.shared import ResolverError, unescape, Task -def resolve(url): - matches = re.search("https?:\/\/(www\.)?pastebin\.com\/([a-zA-Z0-9]+)", url) - - if matches is None: - raise ResolverError("The provided URL is not a valid Pastebin URL.") - - paste_id = matches.group(2) - - try: - contents = urllib2.urlopen(url).read() - except: - raise ResolverError("Could not retrieve the specified URL. The specified paste may not exist.") - - matches = re.search("<h1>([^<]+)</h1>", contents) - - if matches is None: - raise ResolverError("The provided URL is not a valid paste.") - - paste_title = unescape(matches.group(1)) - - file_dict = { - 'url' : "http://pastebin.com/download.php?i=%s" % paste_id, - 'priority' : 1, - 'format' : "text" - } - - return { 'title': paste_title, 'files': [file_dict] } +class PastebinTask(Task): + result_type = "text" + + def run(self): + matches = re.search("https?:\/\/(www\.)?pastebin\.com\/([a-zA-Z0-9]+)", self.url) + + if matches is None: + self.state = "invalid" + raise ResolverError("The provided URL is not a valid Pastebin URL.") + + paste_id = matches.group(2) + + try: + contents = self.fetch_page(self.url) + except urllib2.URLError, e: + self.state = "failed" + raise ResolverError("Could not retrieve the specified URL. The paste may not exist.") + + matches = re.search("<h1>([^<]+)</h1>", contents) + + if matches is None: + self.state = "invalid" + raise ResolverError("The provided URL is not a valid paste.") + + paste_title = unescape(matches.group(1)) + + resolved = { + 'url' : "http://pastebin.com/download.php?i=%s" % paste_id, + 'priority' : 1, + 'format' : "text" + } + + self.results = { + 'title': paste_title, + 'files': [resolved] + } + + self.state = "finished" + return self diff --git a/resolv/resolvers/putlocker.py b/resolv/resolvers/putlocker.py index b603e3a..dc75ce7 100644 --- a/resolv/resolvers/putlocker.py +++ b/resolv/resolvers/putlocker.py @@ -1,62 +1,78 @@ import re -from resolv.shared import ResolverError, unescape +from resolv.shared import ResolverError, unescape, Task -def resolve(url): - try: - import mechanize - except ImportError: - raise ResolverError("The Python mechanize module is required to resolve PutLocker URLs.") +class PutlockerTask(Task): + result_type = "video" - matches = re.search("https?:\/\/(www\.)?putlocker\.com\/(file|embed)\/([A-Z0-9]+)", url) + def run(self): + try: + import mechanize + except ImportError: + self.state = "failed" + raise ResolverError("The Python mechanize module is required to resolve PutLocker URLs.") + + matches = re.search("https?:\/\/(www\.)?putlocker\.com\/(file|embed)\/([A-Z0-9]+)", self.url) - if matches is None: - raise ResolverError("The provided URL is not a valid PutLocker URL.") - - video_id = matches.group(3) - - try: - browser = mechanize.Browser() - browser.set_handle_robots(False) - browser.open("http://putlocker.com/embed/%s" % video_id) - except: - raise ResolverError("The PutLocker site could not be reached.") - - try: - browser.select_form(nr=0) - result = browser.submit() - page = result.read() - except Exception, e: - raise ResolverError("The file was removed, or the URL is incorrect.") - - matches = re.search("playlist: '([^']+)'", page) - - if matches is None: - raise ResolverError("No playlist was found on the given URL; the PutLocker server for this file may be in maintenance mode, or the given URL may not be a video file. The PutLocker resolver currently only supports video links.") - - playlist = matches.group(1) - - try: - browser.open("http://www.putlocker.com%s" % playlist) - except: - raise ResolverError("The playlist file for the given URL could not be loaded.") - - matches = re.search("url=\"([^\"]+)\" type=\"video\/x-flv\"", browser.response().read()) - - if matches is None: - raise ResolverError("The playlist file does not contain any video URLs. The PutLocker resolver currently only supports video links.") - - video_file = matches.group(1) - - try: - video_title = unescape(re.search('<a href="\/file\/[^"]+"[^>]*><strong>([^<]*)<\/strong><\/a>', page).group(1)) - except: - raise ResolverError("Could not find the video title.") - - stream_dict = { - 'url' : video_file, - 'quality' : "unknown", - 'priority' : 1, - 'format' : "unknown" - } - - return { 'title': video_title, 'videos': [stream_dict] } + if matches is None: + self.state = "invalid" + raise ResolverError("The provided URL is not a valid PutLocker URL.") + + video_id = matches.group(3) + + try: + browser = mechanize.Browser() + browser.set_handle_robots(False) + browser.open("http://putlocker.com/embed/%s" % video_id) + except: + self.state = "failed" + raise ResolverError("The PutLocker site could not be reached.") + + try: + browser.select_form(nr=0) + result = browser.submit() + page = result.read() + except Exception, e: + self.state = "nonexistent" + raise ResolverError("The file was removed, or the URL is incorrect.") + + matches = re.search("playlist: '([^']+)'", page) + + if matches is None: + raise ResolverError("No playlist was found on the given URL; the PutLocker server for this file may be in maintenance mode, or the given URL may not be a video file. The PutLocker resolver currently only supports video links.") + + playlist = matches.group(1) + + try: + browser.open("http://www.putlocker.com%s" % playlist) + except: + self.state = "failed" + raise ResolverError("The playlist file for the given URL could not be loaded.") + + matches = re.search("url=\"([^\"]+)\" type=\"video\/x-flv\"", browser.response().read()) + + if matches is None: + self.state = "failed" + raise ResolverError("The playlist file does not contain any video URLs. The PutLocker resolver currently only supports video links.") + + video_file = matches.group(1) + + try: + video_title = unescape(re.search('<a href="\/file\/[^"]+"[^>]*><strong>([^<]*)<\/strong><\/a>', page).group(1)) + except: + self.state = "failed" + raise ResolverError("Could not find the video title.") + + stream_dict = { + 'url' : video_file, + 'quality' : "unknown", + 'priority' : 1, + 'format' : "unknown" + } + + self.results = { + 'title': video_title, + 'videos': [stream_dict] + } + + self.state = "finished" + return self diff --git a/resolv/resolvers/sockshare.py b/resolv/resolvers/sockshare.py index 079b110..a13fca6 100644 --- a/resolv/resolvers/sockshare.py +++ b/resolv/resolvers/sockshare.py @@ -1,62 +1,78 @@ import re -from resolv.shared import ResolverError, unescape +from resolv.shared import ResolverError, unescape, Task -def resolve(url): - try: - import mechanize - except ImportError: - raise ResolverError("The Python mechanize module is required to resolve SockShare URLs.") +class SockshareTask(Task): + result_type = "video" - matches = re.search("https?:\/\/(www\.)?sockshare\.com\/(file|embed)\/([A-Z0-9]+)", url) + def run(self): + try: + import mechanize + except ImportError: + self.state = "failed" + raise ResolverError("The Python mechanize module is required to resolve Sockshare URLs.") + + matches = re.search("https?:\/\/(www\.)?sockshare\.com\/(file|embed)\/([A-Z0-9]+)", self.url) - if matches is None: - raise ResolverError("The provided URL is not a valid SockShare URL.") - - video_id = matches.group(3) - - try: - browser = mechanize.Browser() - browser.set_handle_robots(False) - browser.open("http://sockshare.com/embed/%s" % video_id) - except: - raise ResolverError("The SockShare site could not be reached.") - - try: - browser.select_form(nr=0) - result = browser.submit() - page = result.read() - except Exception, e: - raise ResolverError("The file was removed, or the URL is incorrect.") - - matches = re.search("playlist: '([^']+)'", page) - - if matches is None: - raise ResolverError("No playlist was found on the given URL; the SockShare server for this file may be in maintenance mode, or the given URL may not be a video file. The SockShare resolver currently only supports video links.") - - playlist = matches.group(1) - - try: - browser.open("http://www.sockshare.com%s" % playlist) - except: - raise ResolverError("The playlist file for the given URL could not be loaded.") - - matches = re.search("url=\"([^\"]+)\" type=\"video\/x-flv\"", browser.response().read()) - - if matches is None: - raise ResolverError("The playlist file does not contain any video URLs. The SockShare resolver currently only supports video links.") - - video_file = matches.group(1) - - try: - video_title = unescape(re.search('<a href="\/file\/[^"]+"[^>]*><strong>([^<]*)<\/strong><\/a>', page).group(1)) - except: - raise ResolverError("Could not find the video title.") - - stream_dict = { - 'url' : video_file, - 'quality' : "unknown", - 'priority' : 1, - 'format' : "unknown" - } - - return { 'title': video_title, 'videos': [stream_dict] } + if matches is None: + self.state = "invalid" + raise ResolverError("The provided URL is not a valid Sockshare URL.") + + video_id = matches.group(3) + + try: + browser = mechanize.Browser() + browser.set_handle_robots(False) + browser.open("http://sockshare.com/embed/%s" % video_id) + except: + self.state = "failed" + raise ResolverError("The Sockshare site could not be reached.") + + try: + browser.select_form(nr=0) + result = browser.submit() + page = result.read() + except Exception, e: + self.state = "nonexistent" + raise ResolverError("The file was removed, or the URL is incorrect.") + + matches = re.search("playlist: '([^']+)'", page) + + if matches is None: + raise ResolverError("No playlist was found on the given URL; the Sockshare server for this file may be in maintenance mode, or the given URL may not be a video file. The Sockshare resolver currently only supports video links.") + + playlist = matches.group(1) + + try: + browser.open("http://www.sockshare.com%s" % playlist) + except: + self.state = "failed" + raise ResolverError("The playlist file for the given URL could not be loaded.") + + matches = re.search("url=\"([^\"]+)\" type=\"video\/x-flv\"", browser.response().read()) + + if matches is None: + self.state = "failed" + raise ResolverError("The playlist file does not contain any video URLs. The Sockshare resolver currently only supports video links.") + + video_file = matches.group(1) + + try: + video_title = unescape(re.search('<a href="\/file\/[^"]+"[^>]*><strong>([^<]*)<\/strong><\/a>', page).group(1)) + except: + self.state = "failed" + raise ResolverError("Could not find the video title.") + + stream_dict = { + 'url' : video_file, + 'quality' : "unknown", + 'priority' : 1, + 'format' : "unknown" + } + + self.results = { + 'title': video_title, + 'videos': [stream_dict] + } + + self.state = "finished" + return self diff --git a/resolv/resolvers/youtube.py b/resolv/resolvers/youtube.py index 6ffd2e1..e76aea7 100644 --- a/resolv/resolvers/youtube.py +++ b/resolv/resolvers/youtube.py @@ -1,88 +1,117 @@ -import re, urllib, urllib2 -from resolv.shared import ResolverError, unescape +import re, urllib, urllib2, urlparse +from resolv.shared import ResolverError, unescape, Task -def resolve(url): - try: - contents = urllib2.urlopen(url).read() - except: - raise ResolverError("Could not retrieve the specified URL.") +class YoutubeTask(Task): + result_type = "video" - map_start = "url_encoded_fmt_stream_map=" - map_end = "\\u0026amp;" + extra_headers = { + 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', + 'Accept-Language': 'en-us,en;q=0.5' + } - try: - pos_start = contents.index(map_start) + len(map_start) + 6 - snippet = contents[pos_start:] - except ValueError: - raise ResolverError("The starting position for the YouTube player configuration could not be found. Is the URL really a valid video page?") - - try: - pos_end = snippet.index(map_end) - stream_map = snippet[:pos_end] - except ValueError: - raise ResolverError("The ending position for the YouTube player configuration could not be found.") - - try: - stream_map = urllib.unquote(stream_map) - streams = stream_map.split(',url=') - except: - raise ResolverError("The YouTube player configuration is corrupted.") - - stream_pool = [] - - for stream in streams: - fields = stream.split('&') + def run(self): + try: + contents = self.fetch_page(self.url) + except urllib2.URLError, e: + self.state = "failed" + raise ResolverError("Could not retrieve the specified URL.") + + map_start = "url_encoded_fmt_stream_map=" + map_end = "\\u0026amp;" - if len(fields) < 5: - raise ResolverError("The amount of fields in the YouTube player configuration is incorrect.") + try: + pos_start = contents.index(map_start) + len(map_start) + snippet = contents[pos_start:] + except ValueError: + self.state = "failed" + raise ResolverError("The starting position for the YouTube player configuration could not be found. Is the URL really a valid video page?") - video_url = urllib.unquote(fields[0]) - quality = fields[1].split("=")[1] - fallback_host = fields[2].split("=")[1] - mimetype = urllib.unquote(fields[3].split("=")[1]) - itag = fields[4].split("=", 2)[1] + try: + pos_end = snippet.index(map_end) + stream_map = snippet[:pos_end] + except ValueError: + self.state = "failed" + raise ResolverError("The ending position for the YouTube player configuration could not be found.") - if mimetype.startswith("video/mp4"): - video_format = "mp4" - elif mimetype.startswith("video/x-flv"): - video_format = "flv" - elif mimetype.startswith("video/3gpp"): - video_format = "3gp" - elif mimetype.startswith("video/webm"): - video_format = "webm" - else: - video_format = "unknown" + try: + stream_map = urllib.unquote(stream_map) + streams = stream_map.split(',') + except: + self.state = "failed" + raise ResolverError("The YouTube player configuration is corrupted.") - if quality == "small": - video_quality = "240p" - video_priority = 5 - elif quality == "medium": - video_quality = "360p" - video_priority = 4 - elif quality == "large": - video_quality = "480p" - video_priority = 3 - elif quality == "hd720": - video_quality = "720p" - video_priority = 2 - elif quality == "hd1080": - video_quality = "1080p" - video_priority = 1 - else: - video_quality = "unknown" + stream_pool = [] - stream_dict = { - 'url' : video_url, - 'quality' : video_quality, - 'priority' : video_priority, - 'format' : video_format + for stream in streams: + fields = urlparse.parse_qs(stream) + + if len(fields) < 6: + self.state = "failed" + raise ResolverError("The amount of fields in the YouTube player configuration is incorrect.") + + signature = fields['sig'][0] + video_url = "%s&signature=%s" % (fields['url'][0], signature) + quality = fields['quality'][0] + fallback_host = fields['fallback_host'][0] + mimetype = fields['type'][0] + itag = fields['itag'][0] + + if mimetype.startswith("video/mp4"): + video_format = "mp4" + elif mimetype.startswith("video/x-flv"): + video_format = "flv" + elif mimetype.startswith("video/3gpp"): + video_format = "3gp" + elif mimetype.startswith("video/webm"): + video_format = "webm" + else: + video_format = "unknown" + + if quality == "small": + video_quality = "240p" + video_priority = 5 + elif quality == "medium": + video_quality = "360p" + video_priority = 4 + elif quality == "large": + video_quality = "480p" + video_priority = 3 + elif quality == "hd720": + video_quality = "720p" + video_priority = 2 + elif quality == "hd1080": + video_quality = "1080p" + video_priority = 1 + else: + video_quality = "unknown" + video_priority = 0 + print "UNKNOWN: %s" % quality + + stream_dict = { + 'url' : video_url, + 'quality' : video_quality, + 'priority' : video_priority, + 'format' : video_format, + 'extra' : { + 'itag': itag, + 'mimetype': mimetype, + 'fallback_host': fallback_host + } + } + + stream_pool.append(stream_dict) + + try: + video_title = unescape(re.search('<meta property="og:title" content="([^"]*)">', contents).group(1)) + except: + self.state = "failed" + raise ResolverError("Could not find the video title.") + + self.results = { + 'title': video_title, + 'videos': stream_pool } - stream_pool.append(stream_dict) - - try: - video_title = unescape(re.search('<meta property="og:title" content="([^"]*)">', contents).group(1)) - except: - raise ResolverError("Could not find the video title.") - - return { 'title': video_title, 'videos': stream_pool } + self.state = "finished" + return self diff --git a/resolv/shared.py b/resolv/shared.py index a668870..5598bb4 100644 --- a/resolv/shared.py +++ b/resolv/shared.py @@ -1,4 +1,5 @@ from HTMLParser import HTMLParser +import cookielib, urllib2 import sys reload(sys) @@ -11,5 +12,45 @@ class ResolverError(Exception): def __str__(self): return repr(self.val) +class Task(): + captcha = None + cookiejar = None + useragent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.47 Safari/536.11" + opener = None + results = None + state = "none" + url = "" + result_type = "none" + extra_headers = {} + + def __init__(self, url): + self.cookiejar = cookielib.CookieJar() + + self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookiejar)) + self.opener.addheaders = [] + + self.extra_headers['User-agent'] = self.useragent + + for header, payload in self.extra_headers.iteritems(): + self.opener.addheaders.append((header, payload)) + + self.url = url + + def run(self): + self.state = "finished" + self.results = self.url + return self + + def fetch_page(self, url): + return self.opener.open(url).read() + +class Captcha(): + image = "" + audio = "" + + def __init__(image="", audio=""): + self.image = image + self.audio = audio + def unescape(s): return HTMLParser.unescape.__func__(HTMLParser, s)