Accept multiple threads as input arguments

master
Sven Slootweg 13 years ago
parent 7d578cdea3
commit df175d7715

74
4c

@ -7,7 +7,7 @@ import re, urllib, urllib2, argparse, os
parser = argparse.ArgumentParser(description='Downloads all full-size images in an arbitrary 4chan thread.') parser = argparse.ArgumentParser(description='Downloads all full-size images in an arbitrary 4chan thread.')
parser.add_argument('urllist', metavar='url', type=str, nargs=1, parser.add_argument('urllist', metavar='url', type=str, nargs='+',
help='the URL of the thread') help='the URL of the thread')
parser.add_argument('-n', '--newdir', dest='newdir', action='store_true', parser.add_argument('-n', '--newdir', dest='newdir', action='store_true',
help='create a new directory for this thread in the current directory') help='create a new directory for this thread in the current directory')
@ -18,48 +18,50 @@ args = parser.parse_args()
options = vars(args) options = vars(args)
regex = 'href="(\/\/images\.4chan\.org\/b\/src\/[0-9]+\.[a-z]+)"' regex = 'href="(\/\/images\.4chan\.org\/b\/src\/[0-9]+\.[a-z]+)"'
url = options['urllist'][0]
try: for url in options['urllist']:
page = urllib2.urlopen(url).read() print "Thread URL: %s" % url
except ValueError:
print "That does not look like a valid URL."
exit(1)
except urllib2.HTTPError:
print "The given URL returns a HTTP 404 status code - the thread may have died."
exit(1)
if options['newdir'] == True: try:
thread_id = url.split('/')[-1] page = urllib2.urlopen(url).read()
target_dir = "%s/" % thread_id except ValueError:
print "That does not look like a valid URL."
exit(1)
except urllib2.HTTPError:
print "The given URL returns a HTTP 404 status code - the thread may have died."
exit(1)
if not os.path.exists(thread_id): if options['newdir'] == True:
os.makedirs(thread_id) thread_id = url.split('/')[-1]
else: target_dir = "%s/" % thread_id
target_dir = ""
search = re.compile(regex) if not os.path.exists(thread_id):
matches = search.finditer(page) os.makedirs(thread_id)
else:
target_dir = ""
urls = [] search = re.compile(regex)
matches = search.finditer(page)
for match in matches: urls = []
if match.group(1) not in urls:
urls.append(match.group(1))
current = 1 for match in matches:
total = len(urls) if match.group(1) not in urls:
urls.append(match.group(1))
print "Parsed thread. Total images: %d" % total current = 1
total = len(urls)
for downloadurl in urls: print "Parsed thread. Total images: %d" % total
downloadurl = "http:%s" % downloadurl
filename = downloadurl.split('/')[-1]
path = target_dir + filename
if os.path.exists(path) and options['force_redownload'] == False: for downloadurl in urls:
print "Skipped existing file %s (%d/%d)." % (filename, current, total) downloadurl = "http:%s" % downloadurl
else: filename = downloadurl.split('/')[-1]
urllib.urlretrieve(downloadurl, path) path = target_dir + filename
print "Downloaded %s (%d/%d)." % (filename, current, total)
current += 1 if os.path.exists(path) and options['force_redownload'] == False:
print "Skipped existing file %s (%d/%d)." % (filename, current, total)
else:
urllib.urlretrieve(downloadurl, path)
print "Downloaded %s (%d/%d)." % (filename, current, total)
current += 1

Loading…
Cancel
Save