Actual file downloading and regex fix

master
Sven Slootweg 13 years ago
parent c738ec4a6c
commit 8d406c2582

16
4c

@ -1,8 +1,8 @@
#!/usr/bin/python #!/usr/bin/python
import re, urllib2 import re, urllib, urllib2
regex = 'href="(\/\/images\.4chan\.org\/b\/src\/[0-9]+\.jpg)"' regex = 'href="(\/\/images\.4chan\.org\/b\/src\/[0-9]+\.[a-z]+)"'
url = 'http://boards.4chan.org/b/res/400860795' url = 'http://boards.4chan.org/b/res/400860795'
page = urllib2.urlopen(url).read() page = urllib2.urlopen(url).read()
@ -16,4 +16,14 @@ for match in matches:
if match.group(1) not in urls: if match.group(1) not in urls:
urls.append(match.group(1)) urls.append(match.group(1))
print urls current = 1
total = len(urls)
print "Parsed thread. Total images: %d" % total
for downloadurl in urls:
downloadurl = "http:%s" % downloadurl
filename = downloadurl.split('/')[-1]
urllib.urlretrieve(downloadurl, filename)
print "Downloaded %s (%d/%d)." % (filename, current, total)
current += 1

Loading…
Cancel
Save