You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
54 lines
1.5 KiB
JavaScript
54 lines
1.5 KiB
JavaScript
"use strict";
|
|
|
|
const bhttp = require("bhttp");
|
|
const got = require("got");
|
|
const mergeSources = require("./lib/merge-sources");
|
|
|
|
const assureResponse = require("./lib/shared/assure-response");
|
|
|
|
let state = {
|
|
session: bhttp.session({
|
|
headers: {
|
|
"user-agent": (process.env.NODE_ENV === "production")
|
|
? "seekseek.org crawler (seekseek.org/contact)"
|
|
: "seekseek.org crawler, development mode (seekseek.org/contact)"
|
|
}
|
|
}),
|
|
// For HTTP/2, until bhttp gains HTTP/2 support
|
|
gotSession: got.extend({
|
|
http2: true,
|
|
headers: {
|
|
// "user-agent": "seekseek.org beta crawler (contact/problems: admin@cryto.net)"
|
|
"user-agent": "Mozilla/5.0 (X11; Linux x86_64; rv:87.0) Gecko/20100101 Firefox/87.0"
|
|
},
|
|
hooks: {
|
|
afterResponse: [(response) => {
|
|
assureResponse(response);
|
|
return response;
|
|
}]
|
|
}
|
|
})
|
|
};
|
|
|
|
let baseSchema = {
|
|
backend: "postgresql",
|
|
database: {
|
|
host: "/run/postgresql",
|
|
database: "seekseek_documentation",
|
|
pool: {
|
|
max: 75
|
|
}
|
|
},
|
|
seed: [],
|
|
tags: {},
|
|
tasks: {}
|
|
};
|
|
|
|
// NOTE: This is *not* currently a fully modular system! Identifiers (tags, task IDs, etc.) are still global to the srap instance as a whole, even though the code exists in different modules. Prefixing identifiers with the scraper they originate from, is still necessary!
|
|
module.exports = mergeSources(baseSchema, [
|
|
require("./lib/sources/datasheets/lcsc")(state),
|
|
require("./lib/sources/datasheets/tme")(state),
|
|
require("./lib/sources/datasheets/st")(state),
|
|
require("./lib/sources/datasheets/focus-lcds")(state),
|
|
]);
|