Actual file downloading and regex fix

13 years ago · 8d406c2582
parent c738ec4a6c
commit 8d406c2582
1 changed files with 13 additions and 3 deletions
--- a/16
+++ b/16
@ -1,8 +1,8 @@
 #!/usr/bin/python
-import re, urllib2
+import re, urllib, urllib2
-regex = 'href="(\/\/images\.4chan\.org\/b\/src\/[0-9]+\.jpg)"'
+regex = 'href="(\/\/images\.4chan\.org\/b\/src\/[0-9]+\.[a-z]+)"'
 url = 'http://boards.4chan.org/b/res/400860795'
 page = urllib2.urlopen(url).read()
@ -16,4 +16,14 @@ for match in matches:
 	if match.group(1) not in urls:
 		urls.append(match.group(1))
-print urls
+current = 1
 total = len(urls)
 print "Parsed thread. Total images: %d" % total
 for downloadurl in urls:
 	downloadurl = "http:%s" % downloadurl
 	filename = downloadurl.split('/')[-1]
 	urllib.urlretrieve(downloadurl, filename)
 	print "Downloaded %s (%d/%d)." % (filename, current, total)
 	current += 1