"use strict" ;
const bhttp = require ( "bhttp" ) ;
const got = require ( "got" ) ;
const mergeSources = require ( "./lib/merge-sources" ) ;
const assureResponse = require ( "./lib/shared/assure-response" ) ;
let state = {
session : bhttp . session ( {
headers : {
"user-agent" : ( process . env . NODE _ENV === "production" )
? "seekseek.org crawler (seekseek.org/contact)"
: "seekseek.org crawler, development mode (seekseek.org/contact)"
}
} ) ,
// For HTTP/2, until bhttp gains HTTP/2 support
gotSession : got . extend ( {
http2 : true ,
headers : {
// "user-agent": "seekseek.org beta crawler (contact/problems: admin@cryto.net)"
"user-agent" : "Mozilla/5.0 (X11; Linux x86_64; rv:87.0) Gecko/20100101 Firefox/87.0"
} ,
hooks : {
afterResponse : [ ( response ) => {
assureResponse ( response ) ;
return response ;
} ]
}
} )
} ;
let baseSchema = {
backend : "postgresql" ,
database : {
host : "/run/postgresql" ,
database : "seekseek_documentation" ,
pool : {
max : 75
}
} ,
seed : [ ] ,
tags : { } ,
tasks : { }
} ;
// NOTE: This is *not* currently a fully modular system! Identifiers (tags, task IDs, etc.) are still global to the srap instance as a whole, even though the code exists in different modules. Prefixing identifiers with the scraper they originate from, is still necessary!
module . exports = mergeSources ( baseSchema , [
require ( "./lib/sources/datasheets/lcsc" ) ( state ) ,
require ( "./lib/sources/datasheets/tme" ) ( state ) ,
require ( "./lib/sources/datasheets/st" ) ( state ) ,
require ( "./lib/sources/datasheets/focus-lcds" ) ( state ) ,
] ) ;