"use strict"; const url = require("url"); const assureResponse = require("../../shared/assure-response"); const surgeon = require("../../shared/surgeon-utils"); const extractModelNumber = require("../../shared/extract-model-number"); module.exports = function scrapeProduct({ session }) { return async function({ data, updateData, expireDependents }) { let response = await session.get(data.url); assureResponse(response); let body = response.body.toString(); let itemData = surgeon({ name: [ `selectOne "meta[property='og:title']" | readAttr content`, extractModelNumber ], manufacturer: `selectMaybeOne "[itemprop='brand']" | text | ignoreEmptyString`, description: [ `selectMaybeOne "meta[name='description']" | readAttr content`, // Get rid of the keyword spam... (description) => description.split(",")[0] ], image: `selectMaybeOne "meta[property='og:image']" | readAttr content`, price: `selectMaybeOne .productView-price .price--withoutTax | text`, datasheetURL: [ `selectMaybeOne ".productView-info a[href^='/content/']" | readAttr href`, (relativeURL) => (!/^\/content\/?$/.test(relativeURL)) ? url.resolve("https://focuslcds.com/", relativeURL) : null // Ignore when the datasheet URL is *just* /content/, as that means there is no datasheet for this product ], technicalSpecs: [ `selectAny "#tab-description .productView-info-name"`, { name: `text`, value: `nextUntil ".productView-info-name" ".productView-info-value" | index 0 | text` }] }, body); updateData((oldData) => ({ ... oldData, itemData: itemData })); expireDependents(); }; };