You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
54 lines
1.4 KiB
Python
54 lines
1.4 KiB
Python
import requests, re, json, sys
|
|
|
|
import lxml.html
|
|
from lxml import etree
|
|
from lxml.etree import XMLParser
|
|
|
|
# Dependencies: requests, lxml
|
|
# Usage: python flickrgrab.py http://url.to.flickr/profile/page
|
|
# Saves to working directory
|
|
|
|
# http://stackoverflow.com/questions/16694907/how-to-download-large-file-in-python-with-requests-py
|
|
def DownloadFile(url):
|
|
local_filename = url.split('/')[-1]
|
|
r = requests.get(url, stream = True) # here we need to set stream = True parameter
|
|
with open(local_filename, 'wb') as f:
|
|
for chunk in r.iter_content(chunk_size=1024):
|
|
if chunk: # filter out keep-alive new chunks
|
|
f.write(chunk)
|
|
f.flush()
|
|
return local_filename
|
|
|
|
pagedata = requests.get(sys.argv[1]).text
|
|
|
|
parser = XMLParser(ns_clean=True, recover=True)
|
|
doc = lxml.html.fromstring(pagedata)
|
|
|
|
elem = doc.xpath("//div[@class='Pages']/@data-page-count")
|
|
|
|
if len(elem) > 0:
|
|
page_count = int(elem[0])
|
|
else:
|
|
print "No page count found"
|
|
exit(1)
|
|
|
|
print "Retrieving %d pages worth of images..." % page_count
|
|
|
|
total_done = 0
|
|
|
|
for i in xrange(1, page_count + 1):
|
|
pictures = requests.get("%s?data=1&page=%d&append=1" % (sys.argv[1], i)).json()
|
|
|
|
for picture in pictures:
|
|
try:
|
|
filename = picture['sizes']['o']['file']
|
|
url = picture['sizes']['o']['url']
|
|
DownloadFile(url)
|
|
|
|
total_done += 1
|
|
print "Downloaded %d full-size images..." % total_done
|
|
except KeyError, e:
|
|
print "Skipped image because of missing URL or filename"
|
|
|
|
print "Flipping to page %d..." % i
|