You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
30 lines
655 B
Python
30 lines
655 B
Python
#!/usr/bin/python
|
|
|
|
import re, urllib, urllib2
|
|
|
|
regex = 'href="(\/\/images\.4chan\.org\/b\/src\/[0-9]+\.[a-z]+)"'
|
|
url = 'http://boards.4chan.org/b/res/400860795'
|
|
|
|
page = urllib2.urlopen(url).read()
|
|
|
|
search = re.compile(regex)
|
|
matches = search.finditer(page)
|
|
|
|
urls = []
|
|
|
|
for match in matches:
|
|
if match.group(1) not in urls:
|
|
urls.append(match.group(1))
|
|
|
|
current = 1
|
|
total = len(urls)
|
|
|
|
print "Parsed thread. Total images: %d" % total
|
|
|
|
for downloadurl in urls:
|
|
downloadurl = "http:%s" % downloadurl
|
|
filename = downloadurl.split('/')[-1]
|
|
urllib.urlretrieve(downloadurl, filename)
|
|
print "Downloaded %s (%d/%d)." % (filename, current, total)
|
|
current += 1
|