From 703a34bfa26e9416c5e2db023dee9499c101b0f2 Mon Sep 17 00:00:00 2001
From: Sven Slootweg <jamsoftgamedev@gmail.com>
Date: Sun, 27 Jan 2013 23:06:32 +0100
Subject: [PATCH] Reorganize updater code and add first design idea for
 frontend

---
 .gitignore             |   1 +
 frontend/index.html    |  25 ++++++
 frontend/style.css     |  57 ++++++++++++++
 updater/lib.py         |  85 ++++++++++++++++++++
 updater/update.py      | 171 -----------------------------------------
 updater/update_khan.py | 131 +++++++++++++++++++++++++++++++
 6 files changed, 299 insertions(+), 171 deletions(-)
 create mode 100644 .gitignore
 create mode 100644 frontend/index.html
 create mode 100644 frontend/style.css
 create mode 100644 updater/lib.py
 delete mode 100644 updater/update.py
 create mode 100644 updater/update_khan.py
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..0d20b64
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+*.pyc
diff --git a/frontend/index.html b/frontend/index.html
new file mode 100644
index 0000000..ee85385
--- /dev/null
+++ b/frontend/index.html
@@ -0,0 +1,25 @@
+<!doctype html>
+<html>
+	<head>
+		<title>learn.cryto.net</title>
+		<link rel="stylesheet" href="style.css">
+		<script src="http://ajax.googleapis.com/ajax/libs/jquery/1.9.0/jquery.min.js"></script>
+		<script>
+			$(function(){
+				$("input").keypress(function(){
+					$(this).parent().removeClass("search-large").addClass("search-top");
+				});
+			});
+		</script>
+	</head>
+	<body>
+		<div class="header">
+			<h1><strong>learn.cryto.net</strong> :: Learn something new!</h1>
+		</div>
+		<div class="main">
+			<div class="search-large">
+				I want to learn about <input type="text">.
+			</div>
+		</div>
+	</body>
+</html>
diff --git a/frontend/style.css b/frontend/style.css
new file mode 100644
index 0000000..75681ed
--- /dev/null
+++ b/frontend/style.css
@@ -0,0 +1,57 @@
+body
+{
+	background-color: #F3FFF7;
+	padding: 0px;
+	margin: 0px;
+	font-family: sans-serif;
+}
+
+.header
+{
+	background-color: #C9F9DF;
+	color: #009B53;
+	padding: 12px 14px;
+}
+
+.header h1
+{
+	margin: 0px;
+	font-weight: normal;
+}
+
+.search-large
+{
+	color: #006824;
+	width: 960px;
+	text-align: center;
+	margin: 180px auto;
+	font-size: 42px;
+}
+
+.search-top
+{
+	color: #006824;
+	width: 960px;
+	margin: 16px;
+	font-size: 26px;
+}
+
+.search-large input, .search-top input
+{
+	color: #006824;
+	border: 0px;
+	background-color: transparent;
+	border-bottom: 2px solid #1FDF62;
+}
+
+.search-large input
+{
+	font-size: 42px;
+	width: 300px;
+}
+
+.search-top input
+{
+	font-size: 26px;
+	width: 180px;
+}
diff --git a/updater/lib.py b/updater/lib.py
new file mode 100644
index 0000000..ab7f9d4
--- /dev/null
+++ b/updater/lib.py
@@ -0,0 +1,85 @@
+import datetime, oursql
+
+class Database(object):
+	TOPIC = 1
+	COURSE = 2
+	VIDEO = 3
+	ARTICLE = 4
+	EXERCISE = 5
+	QUIZ = 6
+	TEST = 7
+	BOOK = 8
+	AUDIOBOOK = 9
+	
+	def __init__(self, host, user, password=None, database="learn"):
+		self.database = oursql.connect(host=host, user=user, db=database)
+	
+	def insert_topic(self, provider, unique_id, title, override=False, **kwargs):
+		defaults = {
+			"needs_enrollment": False,
+			"creation_date": None,
+			"start_date": None,
+			"end_date": None,
+			"parent_id": 0,
+			"description": ""
+		}
+		
+		for kwarg, val in defaults.iteritems():
+			try:
+				if kwargs[kwarg] == None:
+					kwargs[kwarg] = defaults[kwarg]
+			except KeyError, e:
+				kwargs[kwarg] = defaults[kwarg]
+		
+		c = self.database.cursor()
+		
+		if override == True:
+			exists = False
+		else:
+			c.execute("SELECT `Id` FROM topics WHERE `Provider` = ? AND `ProviderId` = ? LIMIT 1", (provider, unique_id))
+			results = c.fetchall()
+			exists = (len(results) > 0)
+			
+		if exists == True:
+			return (False, results[0][0])
+		else:
+			c.execute("INSERT INTO topics (`ParentId`, `Provider`, `ProviderId`, `Title`, `Description`, `Created`, `NeedsEnrollment`, `StartDate`, `EndDate`)"
+				  "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)", (kwargs['parent_id'], provider, unique_id, title, kwargs['description'], kwargs['creation_date'], 
+				                                         kwargs['needs_enrollment'], kwargs['start_date'], kwargs['end_date']))
+			
+			return (True, c.lastrowid)
+	
+	def insert_item(self, provider, unique_id, has_topic, itemtype, title, item_url, override=False, **kwargs):
+		defaults = {
+			"views": None,
+			"source_url": item_url,
+			"topic_id": 0,
+			"parent_id": 0,
+			"description": "",
+			"date": None
+		}
+		
+		for kwarg, val in defaults.iteritems():
+			try:
+				if kwargs[kwarg] == None:
+					kwargs[kwarg] = defaults[kwarg]
+			except KeyError, e:
+				kwargs[kwarg] = defaults[kwarg]
+		
+		c = self.database.cursor()
+		
+		if override == True:
+			exists = False
+		else:
+			c.execute("SELECT `Id` FROM items WHERE `Provider` = ? AND `ProviderId` = ? LIMIT 1", (provider, unique_id))
+			results = c.fetchall()
+			exists = (len(results) > 0)
+			
+		if exists == True:
+			return (False, results[0][0])
+		else:
+			c.execute("INSERT INTO items (`HasTopic`, `Type`, `Provider`, `ProviderId`, `Title`, `Description`, `ItemUrl`, `SourceUrl`, `Views`, `TopicId`, `ParentId`, `Date`)"
+				  "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", (has_topic, itemtype, provider, unique_id, title, kwargs["description"], item_url, kwargs["source_url"], 
+									       kwargs["views"], kwargs["topic_id"], kwargs["parent_id"], kwargs["date"]))
+			
+			return (True, c.lastrowid)
diff --git a/updater/update.py b/updater/update.py
deleted file mode 100644
index ce6d256..0000000
--- a/updater/update.py
+++ /dev/null
@@ -1,171 +0,0 @@
-import requests
-import oursql
-import datetime
-import json
-
-database = oursql.connect(host="localhost", user="root", db="learn")
-
-def unicodedammit(input_string):
-	if isinstance(input_string, str):
-		return input_string.decode('utf-8')
-	else:
-		return input_string
-
-class KhanUniversityCrawler(object):
-	TOPIC = 1
-	COURSE = 2
-	VIDEO = 3
-	ARTICLE = 4
-	EXERCISE = 5
-	QUIZ = 6
-	TEST = 7
-	BOOK = 8
-	
-	def __init__(self):
-		pass
-		
-	def retrieve_dataset(self):
-		#self.dataset = requests.get("http://www.khanacademy.org/api/v1/topictree").json()
-		self.dataset = json.loads(open("data.json", "r").read())
-
-	def parse_dataset(self):
-		self.process_item(self.dataset, 0)
-		
-	def process_item(self, item, level, parent=None):
-		global database
-		
-		c = database.cursor()
-		
-		try:
-			kind = item["kind"]
-		except KeyError, e:
-			return
-		
-		if kind == "Topic":
-			unique_id = item["id"]
-			
-			try:
-				parent_id = parent["_cl_id"]
-			except TypeError, e:
-				parent_id = 0
-				
-			if item["description"] is not None:
-				description = item["description"]
-			else:
-				description = ""
-				
-			if item["title"] is not None:
-				title = item["title"]
-			else:
-				title = ""
-			
-			c.execute("SELECT `Id` FROM topics WHERE `ProviderId` = ? LIMIT 1", (unique_id,))
-			results = c.fetchall()
-			exists = (len(results) > 0)
-			
-			if not exists:
-				c.execute("INSERT INTO topics (`ParentId`, `Provider`, `ProviderId`, `Title`, `Description`, `Created`, `NeedsEnrollment`)"
-					  "VALUES (?, 1, ?, ?, ?, ?, 0)", (parent_id, unique_id, title, description, datetime.datetime.now()))
-				
-				print "Inserted topic %s" % title
-				
-				item["_cl_id"] = c.lastrowid
-			else:
-				print u"Skipped topic %s" % title
-				item["_cl_id"] = results[0][0]
-		elif kind in ("Video", "Exercise", "Article"):
-			try:
-				unique_id = item["readable_id"]
-			except KeyError, e:
-				try:
-					unique_id = item["name"]
-				except KeyError, e:
-					try:
-						unique_id = str(item["id"])
-					except KeyError, e:
-						print repr(item)
-						sys.stderr.write("WARNING: No suitable identifier found for item\n")
-						raise
-						return
-					
-			if item["kind"] == "Video":
-				itemtype = self.VIDEO
-			elif item["kind"] == "Exercise":
-				itemtype = self.EXERCISE
-			elif item["kind"] == "Article":
-				itemtype = self.ARTICLE
-				
-			try:
-				source_url = item["ka_url"]
-			except KeyError, e:
-				if itemtype == self.ARTICLE:
-					source_url = ""
-				else:
-					return
-				
-			try:
-				item_url = item["url"]
-			except KeyError, e:
-				item_url = source_url
-				
-			if itemtype == self.ARTICLE:
-				description = item["content"]
-			else:
-				try:
-					description = item["description"]
-				except KeyError, e:
-					description = ""
-					
-			if description is None:
-				description = ""
-			
-			try:
-				title = item["title"]
-			except KeyError, e:
-				try:
-					title = item["display_name"]
-				except KeyError, e:
-					title = "Untitled"
-				
-			try:
-				views = item["views"]
-			except KeyError, e:
-				views = 0
-			
-			c.execute("SELECT `Id` FROM items WHERE `ProviderId` = ? LIMIT 1", (unique_id,))
-			results = c.fetchall()
-			exists = (len(results) > 0)
-			
-			if not exists:
-				try:
-					c.execute("INSERT INTO items (`HasTopic`, `Type`, `Provider`, `ProviderId`, `Title`, `Description`, `ItemUrl`, `SourceUrl`, `Views`, `TopicId`, `ParentId`)"
-						  "VALUES (1, ?, 1, ?, ?, ?, ?, ?, ?, ?, 0)", (itemtype, unique_id, title, description, item_url, source_url, views, parent["_cl_id"]))
-				except oursql.ProgrammingError, e:
-					print repr((itemtype, unique_id, title, description, item_url, source_url, views, parent["_cl_id"]))
-					print repr(description)
-					raise
-				
-				print "Inserted item %s" % title
-				
-				item["_cl_id"] = c.lastrowid
-			else:
-				print "Skipped item %s" % title
-				item["_cl_id"] = results[0][0]
-		elif kind == "Separator":
-			pass  # Ignore separators
-		else:
-			print "Unrecognized kind: %s" % item["kind"]
-			print repr(item)
-			date = datetime.datetime.strptime("2008-08-12T12:20:30Z", "%Y-%m-%dT%H:%M:%SZ")
-		
-		try:
-			children = item["children"]
-		except KeyError, e:
-			pass
-		else:
-			for child in children:
-				self.process_item(child, level + 1, item)
-			
-crawler = KhanUniversityCrawler()
-crawler.retrieve_dataset()
-crawler.parse_dataset()
diff --git a/updater/update_khan.py b/updater/update_khan.py
new file mode 100644
index 0000000..8cc5dfe
--- /dev/null
+++ b/updater/update_khan.py
@@ -0,0 +1,131 @@
+import requests
+import oursql
+import datetime
+import json
+import lib
+
+class KhanUniversityCrawler(object):
+	def __init__(self):
+		self.db = lib.Database("localhost", "root")
+		
+	def retrieve_dataset(self):
+		self.dataset = requests.get("http://www.khanacademy.org/api/v1/topictree").json()
+		#self.dataset = json.loads(open("data.json", "r").read())
+
+	def parse_dataset(self):
+		self.process_item(self.dataset, 0)
+		
+	def process_item(self, item, level, parent=None):
+		try:
+			kind = item["kind"]
+		except KeyError, e:
+			return
+		
+		if kind == "Topic":
+			unique_id = item["id"]
+			
+			try:
+				parent_id = parent["_cl_id"]
+			except TypeError, e:
+				parent_id = 0
+				
+			if item["title"] is not None:
+				title = item["title"]
+			else:
+				title = ""
+			
+			inserted, rowid = self.db.insert_topic(1, unique_id, title, description=item["description"], needs_enrollment=False)
+			item["_cl_id"] = rowid
+			
+			if inserted:
+				print "Inserted %s" % title
+			else:
+				print "Skipped %s" % title
+		elif kind in ("Video", "Exercise", "Article"):
+			try:
+				unique_id = item["readable_id"]
+			except KeyError, e:
+				try:
+					unique_id = item["name"]
+				except KeyError, e:
+					try:
+						unique_id = str(item["id"])
+					except KeyError, e:
+						print repr(item)
+						sys.stderr.write("WARNING: No suitable identifier found for item\n")
+						raise
+						return
+					
+			if item["kind"] == "Video":
+				itemtype = self.db.VIDEO
+			elif item["kind"] == "Exercise":
+				itemtype = self.db.EXERCISE
+			elif item["kind"] == "Article":
+				itemtype = self.db.ARTICLE
+				
+			try:
+				source_url = item["ka_url"]
+			except KeyError, e:
+				if itemtype == self.db.ARTICLE:
+					source_url = ""
+				else:
+					return
+				
+			try:
+				item_url = item["url"]
+			except KeyError, e:
+				try:
+					item_url = item["ka_url"]
+				except KeyError, e:
+					item_url = None
+				
+			if itemtype == self.db.ARTICLE:
+				description = item["content"]
+			else:
+				try:
+					description = item["description"]
+				except KeyError, e:
+					description = None
+			
+			try:
+				title = item["title"]
+			except KeyError, e:
+				try:
+					title = item["display_name"]
+				except KeyError, e:
+					title = "Untitled"
+				
+			try:
+				views = item["views"]
+			except KeyError, e:
+				views = None
+				
+			try:
+				date = datetime.datetime.strptime(item["date_added"], "%Y-%m-%dT%H:%M:%SZ")
+			except KeyError, e:
+				date = None
+			
+			inserted, rowid = self.db.insert_item(1, unique_id, True, itemtype, title, item_url, source_url=source_url, description=description, views=views, topic_id=parent["_cl_id"], date=date)
+			item["_cl_id"] = rowid
+			
+			if inserted:
+				print "Inserted %s" % title
+			else:
+				print "Skipped %s" % title
+		elif kind == "Separator":
+			pass  # Ignore separators
+		else:
+			sys.stderr.write("Unrecognized kind: %s\n" % item["kind"])
+			sys.stderr.write("%s\n" % (repr(item)))
+		
+		try:
+			children = item["children"]
+		except KeyError, e:
+			pass
+		else:
+			for child in children:
+				self.process_item(child, level + 1, item)
+			
+crawler = KhanUniversityCrawler()
+crawler.retrieve_dataset()
+crawler.parse_dataset()