You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
154 lines
3.8 KiB
JavaScript
154 lines
3.8 KiB
JavaScript
"use strict";
|
|
|
|
const bhttp = require("bhttp");
|
|
const got = require("got");
|
|
|
|
const assureResponse = require("./lib/shared/assure-response");
|
|
|
|
let state = {
|
|
session: bhttp.session({
|
|
headers: {
|
|
"user-agent": "seekseek.org beta crawler (contact/problems: admin@cryto.net)"
|
|
}
|
|
}),
|
|
// For HTTP/2, until bhttp gains HTTP/2 support
|
|
gotSession: got.extend({
|
|
http2: true,
|
|
headers: {
|
|
// "user-agent": "seekseek.org beta crawler (contact/problems: admin@cryto.net)"
|
|
"user-agent": "Mozilla/5.0 (X11; Linux x86_64; rv:87.0) Gecko/20100101 Firefox/87.0"
|
|
},
|
|
hooks: {
|
|
afterResponse: [(response) => {
|
|
assureResponse(response);
|
|
return response;
|
|
}]
|
|
}
|
|
})
|
|
};
|
|
|
|
module.exports = {
|
|
database: {
|
|
host: "/run/postgresql",
|
|
database: "seekseek_documentation"
|
|
},
|
|
seed: [{
|
|
id: "st:home",
|
|
tags: [ "st:home" ],
|
|
data: {}
|
|
}, {
|
|
id: "lcsc:home",
|
|
tags: [ "lcsc:home" ],
|
|
data: {}
|
|
}, {
|
|
id: "mouser:sitemap:index",
|
|
tags: [ "mouser:sitemap" ],
|
|
data: { url: "https://www.mouser.com/indexgzipwww.xml" }
|
|
}, {
|
|
id: "tme:sitemap:index",
|
|
tags: [ "tme:sitemap" ],
|
|
data: { url: "https://www.tme.eu/en/sitemap.xml" }
|
|
// TODO: Delete derived sitemap entries
|
|
}, {
|
|
id: "farnell:sitemap:index",
|
|
tags: [ "farnell:sitemap" ],
|
|
data: { url: "https://uk.farnell.com/sitemap.xml" }
|
|
// TODO: Delete derived sitemap entries
|
|
}],
|
|
tags: {
|
|
"st:home": [ "st:findCategories" ],
|
|
"st:category": [ "st:scrapeCategory" ],
|
|
"st:product": [ "st:scrapeProduct", "st:normalizeProduct" ],
|
|
"lcsc:home": [ "lcsc:findCategories" ],
|
|
"lcsc:category": [ "lcsc:scrapeCategory" ],
|
|
"lcsc:product": [ "lcsc:normalizeProduct" ],
|
|
"tme:sitemap": [ "tme:scrapeSitemap" ],
|
|
"tme:product": [ "tme:scrapeProduct", "tme:normalizeProduct" ],
|
|
"farnell:sitemap": [ "farnell:scrapeSitemap" ],
|
|
"farnell:product": [ "farnell:scrapeProduct", "farnell:normalizeProduct" ],
|
|
},
|
|
tasks: {
|
|
// ST Microelectronics
|
|
"st:findCategories": {
|
|
ttl: "15d",
|
|
run: require("./lib/st/task/find-categories")(state)
|
|
},
|
|
"st:scrapeCategory": {
|
|
ttl: "1d",
|
|
taskInterval: "60s",
|
|
version: "2",
|
|
run: require("./lib/st/task/scrape-category")(state)
|
|
},
|
|
"st:scrapeProduct": {
|
|
ttl: "15d",
|
|
taskInterval: "5s",
|
|
run: require("./lib/st/task/scrape-product")(state)
|
|
},
|
|
"st:normalizeProduct": {
|
|
dependsOn: [ "st:scrapeProduct" ],
|
|
version: "8",
|
|
parallelTasks: Infinity,
|
|
run: require("./lib/st/task/normalize-product")(state)
|
|
},
|
|
|
|
// LCSC
|
|
"lcsc:findCategories": {
|
|
ttl: "30d",
|
|
taskVersion: "1",
|
|
run: require("./lib/lcsc/task/find-categories")(state)
|
|
},
|
|
"lcsc:scrapeCategory": {
|
|
ttl: "30d",
|
|
taskInterval: "1m",
|
|
run: require("./lib/lcsc/task/scrape-category")(state)
|
|
},
|
|
"lcsc:normalizeProduct": {
|
|
version: "7",
|
|
parallelTasks: Infinity,
|
|
run: require("./lib/lcsc/task/normalize-product")(state)
|
|
},
|
|
|
|
// Mouser
|
|
"mouser:scrapeSitemap": {
|
|
taskInterval: "30s",
|
|
run: require("./lib/mouser/task/scrape-sitemap")(state)
|
|
},
|
|
|
|
// TME.eu
|
|
"tme:scrapeSitemap": {
|
|
ttl: "3d",
|
|
taskInterval: "30s",
|
|
run: require("./lib/tme/task/scrape-sitemap")(state)
|
|
},
|
|
"tme:scrapeProduct": {
|
|
ttl: "60d",
|
|
taskInterval: "500ms",
|
|
run: require("./lib/tme/task/scrape-product")(state)
|
|
},
|
|
"tme:normalizeProduct": {
|
|
dependsOn: [ "tme:scrapeProduct" ],
|
|
version: "5",
|
|
parallelTasks: Infinity,
|
|
run: require("./lib/tme/task/normalize-product")(state)
|
|
},
|
|
|
|
// Farnell
|
|
"farnell:scrapeSitemap": {
|
|
ttl: "3d",
|
|
taskInterval: "30s",
|
|
run: require("./lib/farnell/task/scrape-sitemap")(state)
|
|
},
|
|
"farnell:scrapeProduct": {
|
|
ttl: "60d",
|
|
taskInterval: "500ms",
|
|
run: require("./lib/farnell/task/scrape-product")(state)
|
|
},
|
|
"farnell:normalizeProduct": {
|
|
dependsOn: [ "farnell:scrapeProduct" ],
|
|
version: "1",
|
|
parallelTasks: Infinity,
|
|
run: require("./lib/farnell/task/normalize-product")(state)
|
|
},
|
|
}
|
|
};
|