Deal with throttling properly

This commit is contained in:
Sven Slootweg 2013-11-02 22:00:29 +01:00
parent 73fa91c953
commit 68e82f8ac8
2 changed files with 13 additions and 1 deletions

View file

@ -13,12 +13,19 @@ while True:
while True: # We want to keep trying until it succeeds...
try:
paste = requests.get("http://pastebin.com/raw.php?i=%s" % item["id"]).text
response = requests.get("http://pastebin.com/raw.php?i=%s" % item["id"])
paste = response.text
except Exception, e:
# TODO: Log error
print e
time.sleep(5)
continue # Retry
if response.status_code == 403:
logger.send(msgpack.packb({"component": "retrieve", "timestamp": int(time.time()), "message": "Got throttled, sleeping..."}))
time.sleep(600)
continue # Retry
break # Done
item["retrieval_time"] = int(time.time())

View file

@ -16,6 +16,11 @@ while True:
time.sleep(30)
continue
if "temporarily blocked your computer" in page:
logger.send(msgpack.packb({"component": "scrape", "timestamp": int(time.time()), "message": "Got throttled, sleeping..."}))
time.sleep(600)
continue
basetime = int(time.time())
xml = lxml.html.fromstring(page)