diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..0d20b64
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
diff --git a/frontend/index.html b/frontend/index.html
new file mode 100644
index 0000000..ee85385
--- /dev/null
+++ b/frontend/index.html
@@ -0,0 +1,25 @@
+ I want to learn about .
diff --git a/frontend/style.css b/frontend/style.css
new file mode 100644
index 0000000..75681ed
--- /dev/null
+++ b/frontend/style.css
@@ -0,0 +1,57 @@
+ background-color: #F3FFF7;
+ padding: 0px;
+ margin: 0px;
+ font-family: sans-serif;
+ background-color: #C9F9DF;
+ color: #009B53;
+ padding: 12px 14px;
+.header h1
+ margin: 0px;
+ font-weight: normal;
+ color: #006824;
+ width: 960px;
+ text-align: center;
+ margin: 180px auto;
+ font-size: 42px;
+ color: #006824;
+ width: 960px;
+ margin: 16px;
+ font-size: 26px;
+.search-large input, .search-top input
+ color: #006824;
+ border: 0px;
+ background-color: transparent;
+ border-bottom: 2px solid #1FDF62;
+.search-large input
+ font-size: 42px;
+ width: 300px;
+.search-top input
+ font-size: 26px;
+ width: 180px;
diff --git a/updater/lib.py b/updater/lib.py
new file mode 100644
index 0000000..ab7f9d4
--- /dev/null
+++ b/updater/lib.py
@@ -0,0 +1,85 @@
+import datetime, oursql
+class Database(object):
+ TOPIC = 1
+ COURSE = 2
+ VIDEO = 3
+ QUIZ = 6
+ TEST = 7
+ BOOK = 8
+ def __init__(self, host, user, password=None, database="learn"):
+ self.database = oursql.connect(host=host, user=user, db=database)
+ def insert_topic(self, provider, unique_id, title, override=False, **kwargs):
+ defaults = {
+ "needs_enrollment": False,
+ "creation_date": None,
+ "start_date": None,
+ "end_date": None,
+ "parent_id": 0,
+ "description": ""
+ }
+ for kwarg, val in defaults.iteritems():
+ try:
+ if kwargs[kwarg] == None:
+ kwargs[kwarg] = defaults[kwarg]
+ except KeyError, e:
+ kwargs[kwarg] = defaults[kwarg]
+ c = self.database.cursor()
+ if override == True:
+ exists = False
+ else:
+ c.execute("SELECT `Id` FROM topics WHERE `Provider` = ? AND `ProviderId` = ? LIMIT 1", (provider, unique_id))
+ results = c.fetchall()
+ exists = (len(results) > 0)
+ if exists == True:
+ return (False, results[0][0])
+ else:
+ c.execute("INSERT INTO topics (`ParentId`, `Provider`, `ProviderId`, `Title`, `Description`, `Created`, `NeedsEnrollment`, `StartDate`, `EndDate`)"
+ "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)", (kwargs['parent_id'], provider, unique_id, title, kwargs['description'], kwargs['creation_date'],
+ kwargs['needs_enrollment'], kwargs['start_date'], kwargs['end_date']))
+ return (True, c.lastrowid)
+ def insert_item(self, provider, unique_id, has_topic, itemtype, title, item_url, override=False, **kwargs):
+ defaults = {
+ "views": None,
+ "source_url": item_url,
+ "topic_id": 0,
+ "parent_id": 0,
+ "description": "",
+ "date": None
+ }
+ for kwarg, val in defaults.iteritems():
+ try:
+ if kwargs[kwarg] == None:
+ kwargs[kwarg] = defaults[kwarg]
+ except KeyError, e:
+ kwargs[kwarg] = defaults[kwarg]
+ c = self.database.cursor()
+ if override == True:
+ exists = False
+ else:
+ c.execute("SELECT `Id` FROM items WHERE `Provider` = ? AND `ProviderId` = ? LIMIT 1", (provider, unique_id))
+ results = c.fetchall()
+ exists = (len(results) > 0)
+ if exists == True:
+ return (False, results[0][0])
+ else:
+ c.execute("INSERT INTO items (`HasTopic`, `Type`, `Provider`, `ProviderId`, `Title`, `Description`, `ItemUrl`, `SourceUrl`, `Views`, `TopicId`, `ParentId`, `Date`)"
+ "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", (has_topic, itemtype, provider, unique_id, title, kwargs["description"], item_url, kwargs["source_url"],
+ kwargs["views"], kwargs["topic_id"], kwargs["parent_id"], kwargs["date"]))
+ return (True, c.lastrowid)
diff --git a/updater/update.py b/updater/update.py
deleted file mode 100644
index ce6d256..0000000
--- a/updater/update.py
+++ /dev/null
@@ -1,171 +0,0 @@
-import requests
-import oursql
-import datetime
-import json
-database = oursql.connect(host="localhost", user="root", db="learn")
-def unicodedammit(input_string):
- if isinstance(input_string, str):
- return input_string.decode('utf-8')
- else:
- return input_string
-class KhanUniversityCrawler(object):
- TOPIC = 1
- COURSE = 2
- VIDEO = 3
- QUIZ = 6
- TEST = 7
- BOOK = 8
- def __init__(self):
- pass
- def retrieve_dataset(self):
- #self.dataset = requests.get("http://www.khanacademy.org/api/v1/topictree").json()
- self.dataset = json.loads(open("data.json", "r").read())
- def parse_dataset(self):
- self.process_item(self.dataset, 0)
- def process_item(self, item, level, parent=None):
- global database
- c = database.cursor()
- try:
- kind = item["kind"]
- except KeyError, e:
- return
- if kind == "Topic":
- unique_id = item["id"]
- try:
- parent_id = parent["_cl_id"]
- except TypeError, e:
- parent_id = 0
- if item["description"] is not None:
- description = item["description"]
- else:
- description = ""
- if item["title"] is not None:
- title = item["title"]
- else:
- title = ""
- c.execute("SELECT `Id` FROM topics WHERE `ProviderId` = ? LIMIT 1", (unique_id,))
- results = c.fetchall()
- exists = (len(results) > 0)
- if not exists:
- c.execute("INSERT INTO topics (`ParentId`, `Provider`, `ProviderId`, `Title`, `Description`, `Created`, `NeedsEnrollment`)"
- "VALUES (?, 1, ?, ?, ?, ?, 0)", (parent_id, unique_id, title, description, datetime.datetime.now()))
- print "Inserted topic %s" % title
- item["_cl_id"] = c.lastrowid
- else:
- print u"Skipped topic %s" % title
- item["_cl_id"] = results[0][0]
- elif kind in ("Video", "Exercise", "Article"):
- try:
- unique_id = item["readable_id"]
- except KeyError, e:
- try:
- unique_id = item["name"]
- except KeyError, e:
- try:
- unique_id = str(item["id"])
- except KeyError, e:
- print repr(item)
- sys.stderr.write("WARNING: No suitable identifier found for item\n")
- raise
- return
- if item["kind"] == "Video":
- itemtype = self.VIDEO
- elif item["kind"] == "Exercise":
- itemtype = self.EXERCISE
- elif item["kind"] == "Article":
- itemtype = self.ARTICLE
- try:
- source_url = item["ka_url"]
- except KeyError, e:
- if itemtype == self.ARTICLE:
- source_url = ""
- else:
- return
- try:
- item_url = item["url"]
- except KeyError, e:
- item_url = source_url
- if itemtype == self.ARTICLE:
- description = item["content"]
- else:
- try:
- description = item["description"]
- except KeyError, e:
- description = ""
- if description is None:
- description = ""
- try:
- title = item["title"]
- except KeyError, e:
- try:
- title = item["display_name"]
- except KeyError, e:
- title = "Untitled"
- try:
- views = item["views"]
- except KeyError, e:
- views = 0
- c.execute("SELECT `Id` FROM items WHERE `ProviderId` = ? LIMIT 1", (unique_id,))
- results = c.fetchall()
- exists = (len(results) > 0)
- if not exists:
- try:
- c.execute("INSERT INTO items (`HasTopic`, `Type`, `Provider`, `ProviderId`, `Title`, `Description`, `ItemUrl`, `SourceUrl`, `Views`, `TopicId`, `ParentId`)"
- "VALUES (1, ?, 1, ?, ?, ?, ?, ?, ?, ?, 0)", (itemtype, unique_id, title, description, item_url, source_url, views, parent["_cl_id"]))
- except oursql.ProgrammingError, e:
- print repr((itemtype, unique_id, title, description, item_url, source_url, views, parent["_cl_id"]))
- print repr(description)
- raise
- print "Inserted item %s" % title
- item["_cl_id"] = c.lastrowid
- else:
- print "Skipped item %s" % title
- item["_cl_id"] = results[0][0]
- elif kind == "Separator":
- pass # Ignore separators
- else:
- print "Unrecognized kind: %s" % item["kind"]
- print repr(item)
- date = datetime.datetime.strptime("2008-08-12T12:20:30Z", "%Y-%m-%dT%H:%M:%SZ")
- try:
- children = item["children"]
- except KeyError, e:
- pass
- else:
- for child in children:
- self.process_item(child, level + 1, item)
-crawler = KhanUniversityCrawler()
diff --git a/updater/update_khan.py b/updater/update_khan.py
new file mode 100644
index 0000000..8cc5dfe
--- /dev/null
+++ b/updater/update_khan.py
@@ -0,0 +1,131 @@
+import requests
+import oursql
+import datetime
+import json
+import lib
+class KhanUniversityCrawler(object):
+ def __init__(self):
+ self.db = lib.Database("localhost", "root")
+ def retrieve_dataset(self):
+ self.dataset = requests.get("http://www.khanacademy.org/api/v1/topictree").json()
+ #self.dataset = json.loads(open("data.json", "r").read())
+ def parse_dataset(self):
+ self.process_item(self.dataset, 0)
+ def process_item(self, item, level, parent=None):
+ try:
+ kind = item["kind"]
+ except KeyError, e:
+ return
+ if kind == "Topic":
+ unique_id = item["id"]
+ try:
+ parent_id = parent["_cl_id"]
+ except TypeError, e:
+ parent_id = 0
+ if item["title"] is not None:
+ title = item["title"]
+ else:
+ title = ""
+ inserted, rowid = self.db.insert_topic(1, unique_id, title, description=item["description"], needs_enrollment=False)
+ item["_cl_id"] = rowid
+ if inserted:
+ print "Inserted %s" % title
+ else:
+ print "Skipped %s" % title
+ elif kind in ("Video", "Exercise", "Article"):
+ try:
+ unique_id = item["readable_id"]
+ except KeyError, e:
+ try:
+ unique_id = item["name"]
+ except KeyError, e:
+ try:
+ unique_id = str(item["id"])
+ except KeyError, e:
+ print repr(item)
+ sys.stderr.write("WARNING: No suitable identifier found for item\n")
+ raise
+ return
+ if item["kind"] == "Video":
+ itemtype = self.db.VIDEO
+ elif item["kind"] == "Exercise":
+ itemtype = self.db.EXERCISE
+ elif item["kind"] == "Article":
+ itemtype = self.db.ARTICLE
+ try:
+ source_url = item["ka_url"]
+ except KeyError, e:
+ if itemtype == self.db.ARTICLE:
+ source_url = ""
+ else:
+ return
+ try:
+ item_url = item["url"]
+ except KeyError, e:
+ try:
+ item_url = item["ka_url"]
+ except KeyError, e:
+ item_url = None
+ if itemtype == self.db.ARTICLE:
+ description = item["content"]
+ else:
+ try:
+ description = item["description"]
+ except KeyError, e:
+ description = None
+ try:
+ title = item["title"]
+ except KeyError, e:
+ try:
+ title = item["display_name"]
+ except KeyError, e:
+ title = "Untitled"
+ try:
+ views = item["views"]
+ except KeyError, e:
+ views = None
+ try:
+ date = datetime.datetime.strptime(item["date_added"], "%Y-%m-%dT%H:%M:%SZ")
+ except KeyError, e:
+ date = None
+ inserted, rowid = self.db.insert_item(1, unique_id, True, itemtype, title, item_url, source_url=source_url, description=description, views=views, topic_id=parent["_cl_id"], date=date)
+ item["_cl_id"] = rowid
+ if inserted:
+ print "Inserted %s" % title
+ else:
+ print "Skipped %s" % title
+ elif kind == "Separator":
+ pass # Ignore separators
+ else:
+ sys.stderr.write("Unrecognized kind: %s\n" % item["kind"])
+ sys.stderr.write("%s\n" % (repr(item)))
+ try:
+ children = item["children"]
+ except KeyError, e:
+ pass
+ else:
+ for child in children:
+ self.process_item(child, level + 1, item)
+crawler = KhanUniversityCrawler()