Compare commits

..

No commits in common. 'master' and 'rewrite' have entirely different histories.

@ -1,13 +1,8 @@
#!/usr/bin/python #!/usr/bin/python
import os, time, sys, urllib, urllib2, threading, re import os, time, sys, json, urllib, urllib2, threading, re
from collections import deque from collections import deque
from BeautifulSoup import BeautifulSoup from BeautifulSoup import BeautifulSoup
try:
import json
except:
import simplejson as json
STOP = False STOP = False
pipe_name = 'pipe_books' pipe_name = 'pipe_books'
@ -79,7 +74,7 @@ class GoogleCrawler (threading.Thread):
print "[google] == SLEEPING..." print "[google] == SLEEPING..."
time.sleep(4) time.sleep(4)
self.crawl_page("http://www.google.com" + next_url) #self.crawl_page("http://www.google.com" + next_url)
class CalibreCrawler(threading.Thread): class CalibreCrawler(threading.Thread):
def run(self): def run(self):
@ -97,13 +92,8 @@ class CalibreCrawler(threading.Thread):
base = url_matcher.match(url).group(1) base = url_matcher.match(url).group(1)
print "[calibr] Starting crawl on %s ..." % url print "[calibr] Starting crawl on %s ..." % url
try: response = urllib2.urlopen(url + "browse/category/allbooks")
response = urllib2.urlopen(url + "browse/category/allbooks")
except urllib2.URLError:
print "Skipping %s, as the server could not be successfully reached." % url
return None
page_contents = response.read() page_contents = response.read()
matcher = re.compile("<div class=\"load_data\" title=\"([\[\]0-9\s,]*)\">") matcher = re.compile("<div class=\"load_data\" title=\"([\[\]0-9\s,]*)\">")

@ -1,10 +1,5 @@
#!/usr/bin/python #!/usr/bin/python
import os, time, sys, _mysql import os, time, sys, json, _mysql
try:
import json
except:
import simplejson as json
def stringdammit(input_string): def stringdammit(input_string):
if isinstance(input_string, str): if isinstance(input_string, str):
@ -67,5 +62,3 @@ while True:
print "Skipped '%s' by '%s' (already exists)" % (s_title, s_authors) print "Skipped '%s' by '%s' (already exists)" % (s_title, s_authors)
else: else:
print "Unrecognized command: %s" % message_type print "Unrecognized command: %s" % message_type
time.sleep(0.05)

Loading…
Cancel
Save