Deal with throttling properly
This commit is contained in:
parent
73fa91c953
commit
68e82f8ac8
|
@ -13,12 +13,19 @@ while True:
|
|||
|
||||
while True: # We want to keep trying until it succeeds...
|
||||
try:
|
||||
paste = requests.get("http://pastebin.com/raw.php?i=%s" % item["id"]).text
|
||||
response = requests.get("http://pastebin.com/raw.php?i=%s" % item["id"])
|
||||
paste = response.text
|
||||
except Exception, e:
|
||||
# TODO: Log error
|
||||
print e
|
||||
time.sleep(5)
|
||||
continue # Retry
|
||||
|
||||
if response.status_code == 403:
|
||||
logger.send(msgpack.packb({"component": "retrieve", "timestamp": int(time.time()), "message": "Got throttled, sleeping..."}))
|
||||
time.sleep(600)
|
||||
continue # Retry
|
||||
|
||||
break # Done
|
||||
|
||||
item["retrieval_time"] = int(time.time())
|
||||
|
|
|
@ -16,6 +16,11 @@ while True:
|
|||
time.sleep(30)
|
||||
continue
|
||||
|
||||
if "temporarily blocked your computer" in page:
|
||||
logger.send(msgpack.packb({"component": "scrape", "timestamp": int(time.time()), "message": "Got throttled, sleeping..."}))
|
||||
time.sleep(600)
|
||||
continue
|
||||
|
||||
basetime = int(time.time())
|
||||
|
||||
xml = lxml.html.fromstring(page)
|
||||
|
|
Loading…
Reference in a new issue