You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

51 lines
1.5 KiB
Python

import datetime, json, sys
import requests, oursql
import shared
class Coursera(shared.Scraper):
provider_id = 2
def run(self):
self.retrieve_dataset()
self.parse_dataset()
def retrieve_dataset(self):
self.dataset = requests.get("https://www.coursera.org/maestro/api/topic/list?full=1").json()
def parse_dataset(self):
for item in self.dataset:
self.process_item(item)
def process_item(self, item):
inserted, rowid = self.insert_topic(str(item["id"]), item["name"], description=item["short_description"], needs_enrollment=True)
if inserted:
self.env.log("Inserted topic %s" % item["name"])
else:
self.env.log("Skipped topic %s" % item["name"])
for course in item["courses"]:
self.process_course(course, rowid)
def process_course(self, course, topicid):
try:
start_date = datetime.datetime(course["start_year"], course["start_month"], course["start_day"])
except TypeError, e:
start_date = None
title = self.generate_title(course['name'], start_date)
inserted, itemid = self.insert_item(str(course["id"]), title, course["home_link"], has_topic=True, itemtype=self.COURSE, description=course["certificate_description"], start_date=start_date, topic_id=topicid)
if inserted:
self.env.log("Inserted item %s" % title)
else:
self.env.log("Skipped item %s" % title)
def generate_title(self, name, date):
if date is None:
return "%s (date undetermined)" % name
else:
return "%s (starting %s)" % (name, date.strftime("%b %d, %Y"))