"use strict"; const Promise = require("bluebird"); const promiseTaskQueue = require("promise-task-queue"); const assureArray = require("assure-array"); const createScrapeLogger = require("./scrape-logger"); const normalizeTags = require("./normalize-items"); module.exports = function simpleRunner(createScraper) { let scraper = createScraper(); let logger = createScrapeLogger({ name: scraper.name }); let queue = promiseTaskQueue(); queue.define("scrape", (task) => { let {url} = task; let updatedUrl = scraper.updateUrl(url); let matchingHandler = scraper.handlers.find(([regex, _handler]) => regex.exec(updatedUrl)); if (matchingHandler == null) { logger.error(`Scraper does not have a handler for URL in queue: ${updatedUrl}`); } else { return Promise.try(() => { let [_regex, handler] = matchingHandler; return handler(updatedUrl); }).then(({urls, items}) => { if (urls != null) { urls.forEach((url) => { logger.debug(`Queueing new URL: ${url}`); queue.push("scrape", {url: url}); }); } if (items != null) { items.forEach((item) => { let normalizedItem = normalizeTags(item); logger.done(normalizedItem); logger.debug(`Encountered item`, normalizedItem); }); } return null; }); } }, { interval: 0.5 }); return Promise.try(() => { return scraper.initialize(); }).then((rootUrls) => { assureArray(rootUrls).forEach((url) => { queue.push("scrape", {url: url}); }); }); };