From df175d7715058b7b2d36eb82aa1787fa0a315c81 Mon Sep 17 00:00:00 2001 From: Sven Slootweg Date: Mon, 21 May 2012 03:47:02 +0200 Subject: [PATCH] Accept multiple threads as input arguments --- 4c | 76 ++++++++++++++++++++++++++++++++------------------------------ 1 file changed, 39 insertions(+), 37 deletions(-) diff --git a/4c b/4c index 5b09b45..1b9f674 100755 --- a/4c +++ b/4c @@ -7,7 +7,7 @@ import re, urllib, urllib2, argparse, os parser = argparse.ArgumentParser(description='Downloads all full-size images in an arbitrary 4chan thread.') -parser.add_argument('urllist', metavar='url', type=str, nargs=1, +parser.add_argument('urllist', metavar='url', type=str, nargs='+', help='the URL of the thread') parser.add_argument('-n', '--newdir', dest='newdir', action='store_true', help='create a new directory for this thread in the current directory') @@ -18,48 +18,50 @@ args = parser.parse_args() options = vars(args) regex = 'href="(\/\/images\.4chan\.org\/b\/src\/[0-9]+\.[a-z]+)"' -url = options['urllist'][0] -try: - page = urllib2.urlopen(url).read() -except ValueError: - print "That does not look like a valid URL." - exit(1) -except urllib2.HTTPError: - print "The given URL returns a HTTP 404 status code - the thread may have died." - exit(1) +for url in options['urllist']: + print "Thread URL: %s" % url + + try: + page = urllib2.urlopen(url).read() + except ValueError: + print "That does not look like a valid URL." + exit(1) + except urllib2.HTTPError: + print "The given URL returns a HTTP 404 status code - the thread may have died." + exit(1) -if options['newdir'] == True: - thread_id = url.split('/')[-1] - target_dir = "%s/" % thread_id + if options['newdir'] == True: + thread_id = url.split('/')[-1] + target_dir = "%s/" % thread_id - if not os.path.exists(thread_id): - os.makedirs(thread_id) -else: - target_dir = "" + if not os.path.exists(thread_id): + os.makedirs(thread_id) + else: + target_dir = "" -search = re.compile(regex) -matches = search.finditer(page) + search = re.compile(regex) + matches = search.finditer(page) -urls = [] + urls = [] -for match in matches: - if match.group(1) not in urls: - urls.append(match.group(1)) + for match in matches: + if match.group(1) not in urls: + urls.append(match.group(1)) -current = 1 -total = len(urls) + current = 1 + total = len(urls) -print "Parsed thread. Total images: %d" % total + print "Parsed thread. Total images: %d" % total -for downloadurl in urls: - downloadurl = "http:%s" % downloadurl - filename = downloadurl.split('/')[-1] - path = target_dir + filename - - if os.path.exists(path) and options['force_redownload'] == False: - print "Skipped existing file %s (%d/%d)." % (filename, current, total) - else: - urllib.urlretrieve(downloadurl, path) - print "Downloaded %s (%d/%d)." % (filename, current, total) - current += 1 + for downloadurl in urls: + downloadurl = "http:%s" % downloadurl + filename = downloadurl.split('/')[-1] + path = target_dir + filename + + if os.path.exists(path) and options['force_redownload'] == False: + print "Skipped existing file %s (%d/%d)." % (filename, current, total) + else: + urllib.urlretrieve(downloadurl, path) + print "Downloaded %s (%d/%d)." % (filename, current, total) + current += 1