@ -14,6 +14,7 @@ const createNDJSONParseStream = require("./ndjson-parse-stream");
module . exports = function ( { scrapingHost } ) {
module . exports = function ( { scrapingHost } ) {
return function createUpdateStream ( { since , prefix } = { } ) {
return function createUpdateStream ( { since , prefix } = { } ) {
let lastTimestamp = since ? ? new Date ( 0 ) ;
let lastTimestamp = since ? ? new Date ( 0 ) ;
let resultCounter = 0 ;
return pipe ( [
return pipe ( [
simpleSource ( ( ) => {
simpleSource ( ( ) => {
@ -27,6 +28,7 @@ module.exports = function({ scrapingHost }) {
} ) ;
} ) ;
} ) . then ( ( response ) => {
} ) . then ( ( response ) => {
if ( response . statusCode === 200 ) {
if ( response . statusCode === 200 ) {
console . log ( "Received success response from sync server" ) ;
return fromNodeStream . fromReadable ( response ) ;
return fromNodeStream . fromReadable ( response ) ;
} else {
} else {
throw new Error ( ` Got unexpected status code ${ response . statusCode } ` ) ;
throw new Error ( ` Got unexpected status code ${ response . statusCode } ` ) ;
@ -48,6 +50,12 @@ module.exports = function({ scrapingHost }) {
createCombineSequentialStream ( ) ,
createCombineSequentialStream ( ) ,
createNDJSONParseStream ( ) ,
createNDJSONParseStream ( ) ,
createSpyStream ( ( item ) => {
createSpyStream ( ( item ) => {
resultCounter ++ ;
if ( resultCounter % 1000 === 0 ) {
console . log ( ` Received ${ resultCounter } items since last restart ` ) ;
}
if ( item . updatedAt != null ) {
if ( item . updatedAt != null ) {
// TODO: Can this be made significantly more performant by string-sorting the timestamps in ISO format directly, instead of going through a parsing cycle?
// TODO: Can this be made significantly more performant by string-sorting the timestamps in ISO format directly, instead of going through a parsing cycle?
let itemDate = new Date ( item . updatedAt ) ;
let itemDate = new Date ( item . updatedAt ) ;