diff --git a/retrieve.py b/retrieve.py index 398ab5a..61e8a3e 100644 --- a/retrieve.py +++ b/retrieve.py @@ -13,12 +13,19 @@ while True: while True: # We want to keep trying until it succeeds... try: - paste = requests.get("http://pastebin.com/raw.php?i=%s" % item["id"]).text + response = requests.get("http://pastebin.com/raw.php?i=%s" % item["id"]) + paste = response.text except Exception, e: # TODO: Log error print e time.sleep(5) continue # Retry + + if response.status_code == 403: + logger.send(msgpack.packb({"component": "retrieve", "timestamp": int(time.time()), "message": "Got throttled, sleeping..."})) + time.sleep(600) + continue # Retry + break # Done item["retrieval_time"] = int(time.time()) diff --git a/scrape.py b/scrape.py index 528e0b0..ff89d31 100644 --- a/scrape.py +++ b/scrape.py @@ -16,6 +16,11 @@ while True: time.sleep(30) continue + if "temporarily blocked your computer" in page: + logger.send(msgpack.packb({"component": "scrape", "timestamp": int(time.time()), "message": "Got throttled, sleeping..."})) + time.sleep(600) + continue + basetime = int(time.time()) xml = lxml.html.fromstring(page)