Actual file downloading and regex fix

2012-05-21 00:03:18 +02:00 · 2012-05-21 00:03:18 +02:00 · 8d406c2582
parent c738ec4a6c
commit 8d406c2582
1 changed files with 13 additions and 3 deletions
--- a/16
+++ b/16
@ -1,8 +1,8 @@
 #!/usr/bin/python

-import re, urllib2
+import re, urllib, urllib2

-regex = 'href="(\/\/images\.4chan\.org\/b\/src\/[0-9]+\.jpg)"'
+regex = 'href="(\/\/images\.4chan\.org\/b\/src\/[0-9]+\.[a-z]+)"'
 url = 'http://boards.4chan.org/b/res/400860795'

 page = urllib2.urlopen(url).read()
@ -16,4 +16,14 @@ for match in matches:
 	if match.group(1) not in urls:
 		urls.append(match.group(1))

-print urls
+current = 1
+total = len(urls)
+
+print "Parsed thread. Total images: %d" % total
+
+for downloadurl in urls:
+	downloadurl = "http:%s" % downloadurl
+	filename = downloadurl.split('/')[-1]
+	urllib.urlretrieve(downloadurl, filename)
+	print "Downloaded %s (%d/%d)." % (filename, current, total)
+	current += 1