Actual file downloading and regex fix
This commit is contained in:
parent
c738ec4a6c
commit
8d406c2582
16
4c
Normal file → Executable file
16
4c
Normal file → Executable file
|
@ -1,8 +1,8 @@
|
|||
#!/usr/bin/python
|
||||
|
||||
import re, urllib2
|
||||
import re, urllib, urllib2
|
||||
|
||||
regex = 'href="(\/\/images\.4chan\.org\/b\/src\/[0-9]+\.jpg)"'
|
||||
regex = 'href="(\/\/images\.4chan\.org\/b\/src\/[0-9]+\.[a-z]+)"'
|
||||
url = 'http://boards.4chan.org/b/res/400860795'
|
||||
|
||||
page = urllib2.urlopen(url).read()
|
||||
|
@ -16,4 +16,14 @@ for match in matches:
|
|||
if match.group(1) not in urls:
|
||||
urls.append(match.group(1))
|
||||
|
||||
print urls
|
||||
current = 1
|
||||
total = len(urls)
|
||||
|
||||
print "Parsed thread. Total images: %d" % total
|
||||
|
||||
for downloadurl in urls:
|
||||
downloadurl = "http:%s" % downloadurl
|
||||
filename = downloadurl.split('/')[-1]
|
||||
urllib.urlretrieve(downloadurl, filename)
|
||||
print "Downloaded %s (%d/%d)." % (filename, current, total)
|
||||
current += 1
|
||||
|
|
Loading…
Reference in a new issue