"use strict"; const bhttp = require("bhttp"); const got = require("got"); const mergeSources = require("./lib/merge-sources"); const assureResponse = require("./lib/shared/assure-response"); let state = { session: bhttp.session({ headers: { "user-agent": (process.env.NODE_ENV === "production") ? "seekseek.org crawler (seekseek.org/contact)" : "seekseek.org crawler, development mode (seekseek.org/contact)" } }), // For HTTP/2, until bhttp gains HTTP/2 support gotSession: got.extend({ http2: true, headers: { // "user-agent": "seekseek.org beta crawler (contact/problems: admin@cryto.net)" "user-agent": "Mozilla/5.0 (X11; Linux x86_64; rv:87.0) Gecko/20100101 Firefox/87.0" }, hooks: { afterResponse: [(response) => { assureResponse(response); return response; }] } }) }; let baseSchema = { backend: "postgresql", database: { host: "/run/postgresql", database: "seekseek_documentation", pool: { max: 75 } }, seed: [], tags: {}, tasks: {} }; // NOTE: This is *not* currently a fully modular system! Identifiers (tags, task IDs, etc.) are still global to the srap instance as a whole, even though the code exists in different modules. Prefixing identifiers with the scraper they originate from, is still necessary! module.exports = mergeSources(baseSchema, [ require("./lib/sources/datasheets/lcsc")(state), require("./lib/sources/datasheets/tme")(state), require("./lib/sources/datasheets/st")(state), require("./lib/sources/datasheets/focus-lcds")(state), ]);