You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

30 lines
655 B

import re, urllib, urllib2
regex = 'href="(\/\/images\.4chan\.org\/b\/src\/[0-9]+\.[a-z]+)"'
url = ''
page = urllib2.urlopen(url).read()
search = re.compile(regex)
matches = search.finditer(page)
urls = []
for match in matches:
if not in urls:
current = 1
total = len(urls)
print "Parsed thread. Total images: %d" % total
for downloadurl in urls:
downloadurl = "http:%s" % downloadurl
filename = downloadurl.split('/')[-1]
urllib.urlretrieve(downloadurl, filename)
print "Downloaded %s (%d/%d)." % (filename, current, total)
current += 1