diff --git a/lib/tme/task/normalize-product.js b/lib/tme/task/normalize-product.js index d92eceb..9412abb 100644 --- a/lib/tme/task/normalize-product.js +++ b/lib/tme/task/normalize-product.js @@ -3,7 +3,7 @@ const createDatasheet = require("../../shared/create-datasheet"); function isEnglish(document) { - return /\sen\s*$/.test(document.description); + return /\sen\s*$/i.test(document.description); } module.exports = function tmeNormalizeProduct() { diff --git a/lib/tme/task/scrape-product.js b/lib/tme/task/scrape-product.js index c01aee4..da042c0 100644 --- a/lib/tme/task/scrape-product.js +++ b/lib/tme/task/scrape-product.js @@ -4,10 +4,17 @@ const assert = require("assert"); const cheerio = require("cheerio"); const syncpipe = require("syncpipe"); const url = require("url"); -const defaultValue = require("default-value"); const assureResponse = require("../../shared/assure-response"); +function firstMatch(options) { + for (let option of options) { + if (option != null && option !== "") { + return option; + } + } +} + module.exports = function tmeScrapeProduct({ session }) { return async function ({ data, createAlias, updateData, expireDependents }) { let response = await session.get(data.url); @@ -15,6 +22,7 @@ module.exports = function tmeScrapeProduct({ session }) { let $ = cheerio.load(response.body); + // FIXME: This is currently broken! let allMetaHeaders = syncpipe($("h2.o-semantic-only-header").toArray(), [ (_) => _.map((header) => { let $header = $(header); @@ -27,15 +35,19 @@ module.exports = function tmeScrapeProduct({ session }) { (_) => Object.fromEntries(_) ]); + let descriptionElement = $(".c-pip__description > h2").eq(0); + let itemData = { - productID: $(".pip-product-symbol").eq(0).text().trim(), - manufacturer: $("[data-gtm-event-action='producer_link']").eq(0).text().trim(), - model: defaultValue( - allMetaHeaders["Manufacturer part number:"], - allMetaHeaders["TME Symbol:"] - ), - description: $(".c-pip__sub-name").eq(0).text().trim(), - documents: $("a.c-pip__downloads-file-link").toArray() + productID: $("h2.c-pip__symbol--tme .c-pip__symbol-value").eq(0).text().trim(), + manufacturer: $("a.pip__product-header-title").eq(0).text().trim(), + model: firstMatch([ + $("h2.c-pip__symbol--producer .c-pip__symbol-value").eq(0).text().trim(), // Manufacturer part number + $("h2.c-pip__symbol--tme .c-pip__symbol-value").eq(0).text().trim(), // TME Symbol + ]), + description: (descriptionElement.children().length === 0) // This skips meta fields if there is no description element + ? descriptionElement.text().trim() + : null, + documents: $("div.c-pip__document > a").toArray() .map((link) => { let relativeLink = $(link).attr("href");