#!/usr/bin/python import re, urllib, urllib2 regex = 'href="(\/\/images\.4chan\.org\/b\/src\/[0-9]+\.[a-z]+)"' url = 'http://boards.4chan.org/b/res/400860795' page = urllib2.urlopen(url).read() search = re.compile(regex) matches = search.finditer(page) urls = [] for match in matches: if match.group(1) not in urls: urls.append(match.group(1)) current = 1 total = len(urls) print "Parsed thread. Total images: %d" % total for downloadurl in urls: downloadurl = "http:%s" % downloadurl filename = downloadurl.split('/')[-1] urllib.urlretrieve(downloadurl, filename) print "Downloaded %s (%d/%d)." % (filename, current, total) current += 1