From 0a2d4fcb9fc5433811c12b15df29c52476bfad04 Mon Sep 17 00:00:00 2001 From: Sven Slootweg Date: Wed, 17 Oct 2012 02:57:41 +0200 Subject: [PATCH] Skip a server when it cannot be reached --- crawler/calibre.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/crawler/calibre.py b/crawler/calibre.py index 03474d2..35897ed 100644 --- a/crawler/calibre.py +++ b/crawler/calibre.py @@ -97,8 +97,13 @@ class CalibreCrawler(threading.Thread): base = url_matcher.match(url).group(1) print "[calibr] Starting crawl on %s ..." % url - - response = urllib2.urlopen(url + "browse/category/allbooks") + + try: + response = urllib2.urlopen(url + "browse/category/allbooks") + except URLError: + print "Skipping %s, as the server could not be successfully reached." % url + return None + page_contents = response.read() matcher = re.compile("
")