You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
212 lines
5.9 KiB
JavaScript
212 lines
5.9 KiB
JavaScript
"use strict";
|
|
|
|
const cheerio = require("cheerio");
|
|
const url = require("url");
|
|
const assert = require("assert");
|
|
const syncpipe = require("syncpipe");
|
|
const htmlEntities = require("html-entities");
|
|
|
|
const createDatasheet = require("../../shared/create-datasheet");
|
|
const assureResponse = require("../../shared/assure-response");
|
|
const getUntaggedText = require("../../shared/get-untagged-text");
|
|
|
|
// ST Microelectronics
|
|
|
|
function extractID(string) {
|
|
// Quick-and-dirty category ID parsing from category pages
|
|
let match = /"prmisID":"([^"]+)"/.exec(string);
|
|
|
|
if (match != null) {
|
|
return match[1];
|
|
} else {
|
|
throw new Error(`ST: prmis ID expected but not found`);
|
|
}
|
|
}
|
|
|
|
module.exports = function ({ session }) {
|
|
return {
|
|
seed: [{
|
|
id: "st:home",
|
|
tags: [ "st:home" ],
|
|
data: {}
|
|
}],
|
|
tags: {
|
|
"st:home": [ "st:findCategories" ],
|
|
"st:category": [ "st:scrapeCategory" ],
|
|
"st:product": [ "st:scrapeProduct", "st:normalizeProduct" ],
|
|
},
|
|
tasks: {
|
|
"st:findCategories": {
|
|
ttl: "15d",
|
|
run: async function ({ createItem }) {
|
|
let response = await session.get("https://www.st.com/content/st_com/en.html");
|
|
assureResponse(response);
|
|
|
|
let $ = cheerio.load(response.body);
|
|
|
|
let links = $("#Top_Menu_Products :is(a.st-nav__blockmenu-title, a.st-nav__blockmenu-link)")
|
|
.toArray()
|
|
.map((element) => $(element).attr("href"))
|
|
.map((relativeURL) => url.resolve("https://www.st.com/", relativeURL));
|
|
|
|
for (let link of links) {
|
|
createItem({
|
|
id: `st:category:${link}`,
|
|
tags: [ "st:category" ],
|
|
data: { url: link }
|
|
});
|
|
}
|
|
}
|
|
},
|
|
"st:scrapeCategory": {
|
|
ttl: "1d",
|
|
taskInterval: "60s",
|
|
version: "2",
|
|
run: async function({ data, createItem }) {
|
|
let response = await session.get(data.url);
|
|
assureResponse(response);
|
|
|
|
let prmisID = extractID(response.body.toString());
|
|
|
|
let listingResponse = await session.get(`https://www.st.com/en/documentation/scraper.cxst-ps-grid.html/${encodeURIComponent(prmisID)}.json`, { noDecode: true });
|
|
assureResponse(response);
|
|
let listingBuffer = listingResponse.body;
|
|
|
|
if (listingBuffer.length > 0) {
|
|
// This is a category that has a product explorer
|
|
let listing = JSON.parse(listingBuffer.toString());
|
|
|
|
let cellNames = listing.columns.map((column) => {
|
|
let cellName = (column.identifier != null)
|
|
? `${column.identifier}_${column.qualifier_identifier}`
|
|
: `nonstandard:${column.name}:${column.qualifier}`
|
|
|
|
createItem({
|
|
id: `st:column:${cellName}`,
|
|
tags: [ "st:column" ],
|
|
data: column
|
|
});
|
|
|
|
return cellName;
|
|
});
|
|
|
|
for (let row of listing.rows) {
|
|
assert(row.productId != null);
|
|
|
|
let cellData = syncpipe(row.cells, [
|
|
(_) => _.map((cell, i) => [ cellNames[i], cell.value ]),
|
|
(_) => Object.fromEntries(_)
|
|
]);
|
|
|
|
createItem({
|
|
id: `st:product:${row.productId}`,
|
|
tags: [ "st:product" ],
|
|
data: {
|
|
... row,
|
|
cells: undefined,
|
|
cellData: cellData
|
|
}
|
|
});
|
|
}
|
|
} else {
|
|
console.warn("Warning: empty response, category does not have product explorer");
|
|
}
|
|
}
|
|
},
|
|
"st:scrapeProduct": {
|
|
ttl: "15d",
|
|
taskInterval: "5s",
|
|
run: async function({ data, createItem, updateData, expireDependents }) {
|
|
if (data.productFolderUrl == null) {
|
|
throw new Error(`No known product page URL`);
|
|
}
|
|
|
|
let productPageURL = url.resolve("https://www.st.com/", data.productFolderUrl);
|
|
|
|
let response = await session.get(productPageURL);
|
|
assureResponse(response);
|
|
|
|
let $ = cheerio.load(response.body);
|
|
|
|
let datasheetLink = $("a[data-js='datasheetLink']").attr("href");
|
|
let datasheetURL = (datasheetLink != null)
|
|
? url.resolve(productPageURL, datasheetLink)
|
|
: null;
|
|
|
|
let resources = $(".st-table--resources")
|
|
.find("h3").toArray()
|
|
.map((heading) => {
|
|
let $heading = $(heading);
|
|
let sectionID = $heading.attr("id");
|
|
let sectionTitle = $heading.text().trim();
|
|
|
|
let $table = $heading.next("table");
|
|
|
|
let items = $table
|
|
.find("tbody tr").toArray()
|
|
.map((row) => {
|
|
let $row = $(row);
|
|
let $mainView = $row.find(".visible-on-desktop-only");
|
|
let $link = $mainView.find("a.st-link");
|
|
|
|
return {
|
|
url: url.resolve(productPageURL, $link.attr("href")),
|
|
documentID: $link.find("span.st-font--bold").text().trim(),
|
|
description: $link.find("span:not(.st-font--bold)").text().trim(),
|
|
version: getUntaggedText($link),
|
|
date: $row.find(".visible-on-desktop-only[data-latest-update]").text().trim()
|
|
};
|
|
});
|
|
|
|
return {
|
|
sectionID: sectionID,
|
|
sectionTitle: sectionTitle,
|
|
items: items
|
|
};
|
|
});
|
|
|
|
updateData((data) => {
|
|
return {
|
|
... data,
|
|
datasheetLink: datasheetURL,
|
|
resources: resources
|
|
};
|
|
});
|
|
|
|
expireDependents();
|
|
|
|
for (let section of resources) {
|
|
for (let resource of section.items) {
|
|
createItem({
|
|
id: `st:resource:${resource.url}`,
|
|
tags: (resource.url === datasheetURL)
|
|
? [ "st:resource", "st:datasheet" ]
|
|
: [ "st_resource" ],
|
|
data: { url: resource.url }
|
|
});
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"st:normalizeProduct": {
|
|
dependsOn: [ "st:scrapeProduct" ],
|
|
version: "8",
|
|
parallelTasks: 50,
|
|
run: async function (api) {
|
|
let { data } = api;
|
|
|
|
createDatasheet(api, {
|
|
priority: 0.8,
|
|
source: "st",
|
|
manufacturer: "STMicroelectronics",
|
|
productID: data.productId,
|
|
name: data.cellData["XJE010_VT-007"],
|
|
description: htmlEntities.decode(data.cellData["XJE014_VT-007"]),
|
|
url: data.datasheetLink
|
|
});
|
|
}
|
|
},
|
|
}
|
|
};
|
|
};
|