Deal with throttling properly

develop
Sven Slootweg 11 years ago
parent 73fa91c953
commit 68e82f8ac8

@ -13,12 +13,19 @@ while True:
while True: # We want to keep trying until it succeeds...
try:
paste = requests.get("http://pastebin.com/raw.php?i=%s" % item["id"]).text
response = requests.get("http://pastebin.com/raw.php?i=%s" % item["id"])
paste = response.text
except Exception, e:
# TODO: Log error
print e
time.sleep(5)
continue # Retry
if response.status_code == 403:
logger.send(msgpack.packb({"component": "retrieve", "timestamp": int(time.time()), "message": "Got throttled, sleeping..."}))
time.sleep(600)
continue # Retry
break # Done
item["retrieval_time"] = int(time.time())

@ -16,6 +16,11 @@ while True:
time.sleep(30)
continue
if "temporarily blocked your computer" in page:
logger.send(msgpack.packb({"component": "scrape", "timestamp": int(time.time()), "message": "Got throttled, sleeping..."}))
time.sleep(600)
continue
basetime = int(time.time())
xml = lxml.html.fromstring(page)

Loading…
Cancel
Save