Fix TME scraper for site update

master
Sven Slootweg 1 year ago
parent 8c48f70677
commit 794526d001

@ -3,7 +3,7 @@
const createDatasheet = require("../../shared/create-datasheet"); const createDatasheet = require("../../shared/create-datasheet");
function isEnglish(document) { function isEnglish(document) {
return /\sen\s*$/.test(document.description); return /\sen\s*$/i.test(document.description);
} }
module.exports = function tmeNormalizeProduct() { module.exports = function tmeNormalizeProduct() {

@ -4,10 +4,17 @@ const assert = require("assert");
const cheerio = require("cheerio"); const cheerio = require("cheerio");
const syncpipe = require("syncpipe"); const syncpipe = require("syncpipe");
const url = require("url"); const url = require("url");
const defaultValue = require("default-value");
const assureResponse = require("../../shared/assure-response"); const assureResponse = require("../../shared/assure-response");
function firstMatch(options) {
for (let option of options) {
if (option != null && option !== "") {
return option;
}
}
}
module.exports = function tmeScrapeProduct({ session }) { module.exports = function tmeScrapeProduct({ session }) {
return async function ({ data, createAlias, updateData, expireDependents }) { return async function ({ data, createAlias, updateData, expireDependents }) {
let response = await session.get(data.url); let response = await session.get(data.url);
@ -15,6 +22,7 @@ module.exports = function tmeScrapeProduct({ session }) {
let $ = cheerio.load(response.body); let $ = cheerio.load(response.body);
// FIXME: This is currently broken!
let allMetaHeaders = syncpipe($("h2.o-semantic-only-header").toArray(), [ let allMetaHeaders = syncpipe($("h2.o-semantic-only-header").toArray(), [
(_) => _.map((header) => { (_) => _.map((header) => {
let $header = $(header); let $header = $(header);
@ -27,15 +35,19 @@ module.exports = function tmeScrapeProduct({ session }) {
(_) => Object.fromEntries(_) (_) => Object.fromEntries(_)
]); ]);
let descriptionElement = $(".c-pip__description > h2").eq(0);
let itemData = { let itemData = {
productID: $(".pip-product-symbol").eq(0).text().trim(), productID: $("h2.c-pip__symbol--tme .c-pip__symbol-value").eq(0).text().trim(),
manufacturer: $("[data-gtm-event-action='producer_link']").eq(0).text().trim(), manufacturer: $("a.pip__product-header-title").eq(0).text().trim(),
model: defaultValue( model: firstMatch([
allMetaHeaders["Manufacturer part number:"], $("h2.c-pip__symbol--producer .c-pip__symbol-value").eq(0).text().trim(), // Manufacturer part number
allMetaHeaders["TME Symbol:"] $("h2.c-pip__symbol--tme .c-pip__symbol-value").eq(0).text().trim(), // TME Symbol
), ]),
description: $(".c-pip__sub-name").eq(0).text().trim(), description: (descriptionElement.children().length === 0) // This skips meta fields if there is no description element
documents: $("a.c-pip__downloads-file-link").toArray() ? descriptionElement.text().trim()
: null,
documents: $("div.c-pip__document > a").toArray()
.map((link) => { .map((link) => {
let relativeLink = $(link).attr("href"); let relativeLink = $(link).attr("href");

Loading…
Cancel
Save