"use strict"; const cheerio = require("cheerio"); const url = require("url"); const assureResponse = require("../../shared/assure-response"); const getUntaggedText = require("../../shared/get-untagged-text"); module.exports = function scrapeProduct({ session }) { return async function({ data, createItem, updateData, expireDependents }) { if (data.productFolderUrl == null) { throw new Error(`No known product page URL`); } let productPageURL = url.resolve("https://www.st.com/", data.productFolderUrl); let response = await session.get(productPageURL); assureResponse(response); let $ = cheerio.load(response.body); let datasheetLink = $("a[data-js='datasheetLink']").attr("href"); let datasheetURL = (datasheetLink != null) ? url.resolve(productPageURL, datasheetLink) : null; let resources = $(".st-table--resources") .find("h3").toArray() .map((heading) => { let $heading = $(heading); let sectionID = $heading.attr("id"); let sectionTitle = $heading.text().trim(); let $table = $heading.next("table"); let items = $table .find("tbody tr").toArray() .map((row) => { let $row = $(row); let $mainView = $row.find(".visible-on-desktop-only"); let $link = $mainView.find("a.st-link"); return { url: url.resolve(productPageURL, $link.attr("href")), documentID: $link.find("span.st-font--bold").text().trim(), description: $link.find("span:not(.st-font--bold)").text().trim(), version: getUntaggedText($link), date: $row.find(".visible-on-desktop-only[data-latest-update]").text().trim() }; }); return { sectionID: sectionID, sectionTitle: sectionTitle, items: items }; }); updateData((data) => { return { ... data, datasheetLink: datasheetURL, resources: resources }; }); expireDependents(); for (let section of resources) { for (let resource of section.items) { createItem({ id: `st:resource:${resource.url}`, tags: (resource.url === datasheetURL) ? [ "st:resource", "st:datasheet" ] : [ "st_resource" ], data: { url: resource.url } }); } } }; };