import requests
import oursql
import datetime
import json
import lib

class CourseraCrawler(object):
	def __init__(self):
		self.db = lib.Database("localhost", "root")
		
	def retrieve_dataset(self):
		#self.dataset = requests.get("https://www.coursera.org/maestro/api/topic/list?full=1").json()
		self.dataset = json.loads(open("coursera.json", "r").read())

	def parse_dataset(self):
		for item in self.dataset:
			self.process_item(item)
		
	def process_item(self, item):
		inserted, rowid = self.db.insert_topic(2, str(item["id"]), item["name"], description=item["short_description"], needs_enrollment=True)
		
		if inserted:
			print "Inserted %s" % item["name"]
		else:
			print "Skipped %s" % item["name"]
		
		for course in item["courses"]:
			self.process_course(course, rowid)
	
	def process_course(self, course, topicid):
		try:
			start_date = datetime.datetime(course["start_year"], course["start_month"], course["start_day"])
			title = "%s: %s-%s-%s" % (course["name"], str(course["start_year"]).zfill(4), str(course["start_month"]).zfill(2), str(course["start_day"]).zfill(2))
		except TypeError, e:
			start_date = None
			title = "%s (date undetermined)" % (course["name"])
		
		inserted, itemid = self.db.insert_item(2, str(course["id"]), True, self.db.COURSE, title, course["home_link"], description=course["certificate_description"], start_date=start_date, topic_id=topicid)
		
		if inserted:
			print "\tInserted %s" % title
		else:
			print "\tSkipped %s" % title
			
crawler = CourseraCrawler()
crawler.retrieve_dataset()
crawler.parse_dataset()