|
|
@ -1,11 +1,34 @@
|
|
|
|
#!/usr/bin/python
|
|
|
|
#!/usr/bin/python
|
|
|
|
|
|
|
|
|
|
|
|
import re, urllib, urllib2
|
|
|
|
import re, urllib, urllib2, argparse, os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
parser = argparse.ArgumentParser(description='Downloads all full-size images in an arbitrary 4chan thread.')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
parser.add_argument('urllist', metavar='url', type=str, nargs=1,
|
|
|
|
|
|
|
|
help='the URL of the thread')
|
|
|
|
|
|
|
|
parser.add_argument('-n', '--newdir', dest='newdir', action='store_true',
|
|
|
|
|
|
|
|
help='create a new directory for this thread in the current directory')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
options = vars(args)
|
|
|
|
|
|
|
|
|
|
|
|
regex = 'href="(\/\/images\.4chan\.org\/b\/src\/[0-9]+\.[a-z]+)"'
|
|
|
|
regex = 'href="(\/\/images\.4chan\.org\/b\/src\/[0-9]+\.[a-z]+)"'
|
|
|
|
url = 'http://boards.4chan.org/b/res/400860795'
|
|
|
|
url = options['urllist'][0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
|
|
page = urllib2.urlopen(url).read()
|
|
|
|
|
|
|
|
except ValueError:
|
|
|
|
|
|
|
|
print "That does not look like a valid URL."
|
|
|
|
|
|
|
|
exit(1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if options['newdir'] == True:
|
|
|
|
|
|
|
|
thread_id = url.split('/')[-1]
|
|
|
|
|
|
|
|
target_dir = "%s/" % thread_id
|
|
|
|
|
|
|
|
|
|
|
|
page = urllib2.urlopen(url).read()
|
|
|
|
if not os.path.exists(thread_id):
|
|
|
|
|
|
|
|
os.makedirs(thread_id)
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
target_dir = ""
|
|
|
|
|
|
|
|
|
|
|
|
search = re.compile(regex)
|
|
|
|
search = re.compile(regex)
|
|
|
|
matches = search.finditer(page)
|
|
|
|
matches = search.finditer(page)
|
|
|
@ -24,6 +47,7 @@ print "Parsed thread. Total images: %d" % total
|
|
|
|
for downloadurl in urls:
|
|
|
|
for downloadurl in urls:
|
|
|
|
downloadurl = "http:%s" % downloadurl
|
|
|
|
downloadurl = "http:%s" % downloadurl
|
|
|
|
filename = downloadurl.split('/')[-1]
|
|
|
|
filename = downloadurl.split('/')[-1]
|
|
|
|
urllib.urlretrieve(downloadurl, filename)
|
|
|
|
path = target_dir + filename
|
|
|
|
|
|
|
|
urllib.urlretrieve(downloadurl, path)
|
|
|
|
print "Downloaded %s (%d/%d)." % (filename, current, total)
|
|
|
|
print "Downloaded %s (%d/%d)." % (filename, current, total)
|
|
|
|
current += 1
|
|
|
|
current += 1
|
|
|
|