Deal with throttling properly

develop
Sven Slootweg 11 years ago
parent 73fa91c953
commit 68e82f8ac8

@ -13,12 +13,19 @@ while True:
while True: # We want to keep trying until it succeeds... while True: # We want to keep trying until it succeeds...
try: try:
paste = requests.get("http://pastebin.com/raw.php?i=%s" % item["id"]).text response = requests.get("http://pastebin.com/raw.php?i=%s" % item["id"])
paste = response.text
except Exception, e: except Exception, e:
# TODO: Log error # TODO: Log error
print e print e
time.sleep(5) time.sleep(5)
continue # Retry continue # Retry
if response.status_code == 403:
logger.send(msgpack.packb({"component": "retrieve", "timestamp": int(time.time()), "message": "Got throttled, sleeping..."}))
time.sleep(600)
continue # Retry
break # Done break # Done
item["retrieval_time"] = int(time.time()) item["retrieval_time"] = int(time.time())

@ -16,6 +16,11 @@ while True:
time.sleep(30) time.sleep(30)
continue continue
if "temporarily blocked your computer" in page:
logger.send(msgpack.packb({"component": "scrape", "timestamp": int(time.time()), "message": "Got throttled, sleeping..."}))
time.sleep(600)
continue
basetime = int(time.time()) basetime = int(time.time())
xml = lxml.html.fromstring(page) xml = lxml.html.fromstring(page)

Loading…
Cancel
Save