You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
51 lines
1.5 KiB
Python
51 lines
1.5 KiB
Python
import datetime, json, sys
|
|
import requests, oursql
|
|
import shared
|
|
|
|
class Coursera(shared.Scraper):
|
|
provider_id = 2
|
|
|
|
def run(self):
|
|
self.retrieve_dataset()
|
|
self.parse_dataset()
|
|
|
|
def retrieve_dataset(self):
|
|
self.dataset = requests.get("https://www.coursera.org/maestro/api/topic/list?full=1").json()
|
|
|
|
def parse_dataset(self):
|
|
for item in self.dataset:
|
|
self.process_item(item)
|
|
|
|
def process_item(self, item):
|
|
inserted, rowid = self.insert_topic(str(item["id"]), item["name"], description=item["short_description"], needs_enrollment=True)
|
|
|
|
if inserted:
|
|
self.env.log("Inserted topic %s" % item["name"])
|
|
else:
|
|
self.env.log("Skipped topic %s" % item["name"])
|
|
|
|
for course in item["courses"]:
|
|
self.process_course(course, rowid)
|
|
|
|
def process_course(self, course, topicid):
|
|
try:
|
|
start_date = datetime.datetime(course["start_year"], course["start_month"], course["start_day"])
|
|
except TypeError, e:
|
|
start_date = None
|
|
|
|
title = self.generate_title(course['name'], start_date)
|
|
|
|
inserted, itemid = self.insert_item(str(course["id"]), title, course["home_link"], has_topic=True, itemtype=self.COURSE, description=course["certificate_description"], start_date=start_date, topic_id=topicid)
|
|
|
|
if inserted:
|
|
self.env.log("Inserted item %s" % title)
|
|
else:
|
|
self.env.log("Skipped item %s" % title)
|
|
|
|
def generate_title(self, name, date):
|
|
if date is None:
|
|
return "%s (date undetermined)" % name
|
|
else:
|
|
return "%s (starting %s)" % (name, date.strftime("%b %d, %Y"))
|
|
|