You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

138 lines
4.2 KiB
JavaScript

"use strict";
const syncpipe = require("syncpipe");
const url = require("url");
const assureResponse = require("../../shared/assure-response");
const surgeon = require("../../shared/surgeon-utils");
const uniqueArray = require("../../shared/unique-array");
const extractModelNumber = require("../../shared/extract-model-number");
const createDatasheet = require("../../shared/create-datasheet");
// Focus LCDs
module.exports = function ({ session }) {
return {
seed: [{
id: "focus-lcds:home",
tags: [ "focus-lcds:home" ],
data: {}
}],
tags: {
"focus-lcds:home": [ "focus-lcds:findCategories" ],
"focus-lcds:category": [ "focus-lcds:scrapeCategory" ],
"focus-lcds:product": [ "focus-lcds:scrapeProduct", "focus-lcds:normalizeProduct" ],
},
tasks: {
"focus-lcds:findCategories": {
ttl: "60d",
run: async function({ createItem }) {
let response = await session.get("https://focuslcds.com/");
assureResponse(response);
let urls = syncpipe(null, [
_ => surgeon(`selectMany ".category-list a" | readAttr href`, response.body.toString()),
_ => uniqueArray(_),
_ => _.filter((relativeURL) => relativeURL !== ""),
_ => _.map((relativeURL) => url.resolve("https://focuslcds.com/", relativeURL))
]);
for (let url of urls) {
createItem({
id: `focus-lcds:category:${url}`,
tags: [ "focus-lcds:category" ],
data: { url: url }
});
}
}
},
"focus-lcds:scrapeCategory": {
ttl: "15d",
taskInterval: "1m",
run: async function({ data, createItem }) {
let response = await session.get(data.url);
assureResponse(response);
let body = response.body.toString();
let nextPageURL = surgeon(`selectMaybeN ".pagination-item--next a" 0 | readAttr href`, body);
if (nextPageURL != null) {
createItem({
id: `focus-lcds:category:${nextPageURL}`,
tags: [ "focus-lcds:category" ],
data: { url: nextPageURL }
});
}
let items = surgeon(`selectMany "ul.productList .listItem-title a" | readAttr href`, body);
for (let url of items) {
createItem({
id: `focus-lcds:product:${url}`,
tags: [ "focus-lcds:product" ],
data: { url: url }
});
}
}
},
"focus-lcds:scrapeProduct": {
ttl: "15d",
taskInterval: "5s",
run: async function({ data, updateData, expireDependents }) {
let response = await session.get(data.url);
assureResponse(response);
let body = response.body.toString();
let itemData = surgeon({
name: [ `selectOne "meta[property='og:title']" | readAttr content`, extractModelNumber ],
manufacturer: `selectMaybeOne "[itemprop='brand']" | text | ignoreEmptyString`,
description: [
`selectMaybeOne "meta[name='description']" | readAttr content`,
// Get rid of the keyword spam...
(description) => description.split(",")[0]
],
image: `selectMaybeOne "meta[property='og:image']" | readAttr content`,
price: `selectMaybeOne .productView-price .price--withoutTax | text`,
datasheetURL: [
`selectMaybeOne ".productView-info a[href^='/content/']" | readAttr href`,
(relativeURL) => (!/^\/content\/?$/.test(relativeURL))
? url.resolve("https://focuslcds.com/", relativeURL)
: null // Ignore when the datasheet URL is *just* /content/, as that means there is no datasheet for this product
],
technicalSpecs: [ `selectAny "#tab-description .productView-info-name"`, {
name: `text`,
value: `nextUntil ".productView-info-name" ".productView-info-value" | index 0 | text`
}]
}, body);
updateData((oldData) => ({
... oldData,
itemData: itemData
}));
expireDependents();
}
},
"focus-lcds:normalizeProduct": {
dependsOn: [ "focus-lcds:scrapeProduct" ],
parallelTasks: 50,
run: async function (api) {
let { data } = api;
createDatasheet(api, {
priority: 0.8,
source: "focus-lcds",
manufacturer: data.itemData.manufacturer ?? "Focus LCDs",
productID: null,
name: data.itemData.name,
description: data.itemData.description,
url: data.itemData.datasheetURL
});
}
},
}
};
};