You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

54 lines
1.4 KiB
Python

import requests, re, json, sys
import lxml.html
from lxml import etree
from lxml.etree import XMLParser
# Dependencies: requests, lxml
# Usage: python flickrgrab.py http://url.to.flickr/profile/page
# Saves to working directory
# http://stackoverflow.com/questions/16694907/how-to-download-large-file-in-python-with-requests-py
def DownloadFile(url):
local_filename = url.split('/')[-1]
r = requests.get(url, stream = True) # here we need to set stream = True parameter
with open(local_filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
f.flush()
return local_filename
pagedata = requests.get(sys.argv[1]).text
parser = XMLParser(ns_clean=True, recover=True)
doc = lxml.html.fromstring(pagedata)
elem = doc.xpath("//div[@class='Pages']/@data-page-count")
if len(elem) > 0:
page_count = int(elem[0])
else:
print "No page count found"
exit(1)
print "Retrieving %d pages worth of images..." % page_count
total_done = 0
for i in xrange(1, page_count + 1):
pictures = requests.get("%s?data=1&page=%d&append=1" % (sys.argv[1], i)).json()
for picture in pictures:
try:
filename = picture['sizes']['o']['file']
url = picture['sizes']['o']['url']
DownloadFile(url)
total_done += 1
print "Downloaded %d full-size images..." % total_done
except KeyError, e:
print "Skipped image because of missing URL or filename"
print "Flipping to page %d..." % i