"use strict"; const bhttp = require("bhttp"); const zlib = require("zlib"); const assert = require("assert"); const pipe = require("@promistream/pipe"); const fromNodeStream = require("@promistream/from-node-stream"); const simpleSink = require("@promistream/simple-sink"); const parseSitemap = require("@promistream/parse-sitemap"); const decodeString = require("@promistream/decode-string"); const assureResponse = require("../../shared/assure-response"); let session = bhttp.session({ headers: { "user-agent": "Mozilla/5.0 (X11; Linux x86_64; rv:86.0) Gecko/20100101 Firefox/86.0", "accept": "*/*" } }); // NOTE: WIP, currently running into PerimeterX issues, to be continued later module.exports = function mouserScrapeSitemap({ }) { return async function ({ data, createItem }) { // console.log((await session.get("https://www.mouser.com/")).statusCode); // console.log({data}); console.log("Making request...."); let response = await session.get(data.url, { stream: true }); // let response = await session.get("http://localhost:4567", { stream: true }); console.log("Got response"); assureResponse(response); console.log("Valid"); await pipe([ fromNodeStream(response), (data.url.endsWith(".gz")) ? fromNodeStream(zlib.createGunzip()) : null, decodeString("utf8"), parseSitemap(), simpleSink((item) => { assert(item.url); if (item.type === "sitemap") { createItem({ id: `mouser:sitemap:${item.url}`, tags: [ "mouser:sitemap" ], data: { url: item.url } }); } else { console.log(item); } }) ]).read(); }; };