from-node-stream/index.js

"use strict";

const Promise = require("bluebird");
const simpleSource = require("@promistream/simple-source");
const simpleSink = require("@promistream/simple-sink");
const buffer = require("@promistream/buffer");
const propagatePeek = require("@promistream/propagate-peek");
const propagateAbort = require("@promistream/propagate-abort");
const pipe = require("@promistream/pipe");
const isEndOfStream = require("@promistream/is-end-of-stream");
const debug = require("debug");

const objectID = require("./src/object-id");
const wireUpReadableInterface = require("./src/readable");
const wireUpWritableInterface = require("./src/writable");

// FIXME: Maybe also an abstraction for 'handle queue of requests', as this is used in multiple stream implementations
// TODO: Improve robustness of stream-end handling using https://nodejs.org/dist/latest-v14.x/docs/api/stream.html#stream_stream_finished_stream_options_callback?
// FIXME: Sequentialize all of these?

// readable
// writable
// transform
// duplex

module.exports = function convert(stream) {
	// FIXME: Proper validation and tagging
	// FIXME: Wrap v1 streams
	// NOTE: Standard I/O streams are specialcased here because they may be Duplex streams; even though the other half is never actually used. We're only interested in the interface that *is* being used.
	if (stream === process.stdin) {
		return fromReadable(stream);
	} else if (stream === process.stdout || stream === process.stderr) {
		return fromWritable(stream);
	} else if (stream.writable != null) {
		if (stream.readable != null) {
			if  (stream._transform != null) {
				// transform
				return fromTransform(stream);
			} else {
				throw new Error(`Duplex streams cannot be converted with the auto-detection API. Instead, use 'fromReadable' and/or 'fromWritable' manually, depending on which parts of the Duplex stream you are interested in.`);
			}
		} else {
			return fromWritable(stream);
		}
	} else if (stream.readable != null) {
		return fromReadable(stream);
	} else {
		throw new Error(`Not a Node stream`);
	}
};

function fromReadable(stream) {
	let readable = wireUpReadableInterface(stream);

	return simpleSource({
		onRequest: () => {
			return readable.request();
		},
		onAbort: () => {
			return readable.destroy();
		}
	});
}

let debugWritable = debug("promistream:from-node-stream:writable");

function fromWritable(stream) {
	let upstreamHasEnded = false;
	let mostRecentSource = { abort: function() {} }; // FIXME: Replace with a proper spec-compliant dummy stream

	let convertedStream = simpleSink({
		onResult: (result) => {
			debugWritable(`[#${objectID(stream)}] Received value`);
			return writable.write(result);
		},
		onEnd: () => {
			debugWritable(`[#${objectID(stream)}] Upstream reported end-of-stream`);
			upstreamHasEnded = true;
			return writable.end();
		},
		onAbort: (_reason) => {
			debugWritable(`[#${objectID(stream)}] Pipeline was aborted`);
			return writable.destroy();
		},
		onSourceChanged: (source) => {
			debugWritable(`[#${objectID(stream)}] A source change occurred`);
			mostRecentSource = source;
		}
	});

	// NOTE: The use of `var` is intentional, to make hoisting possible here; otherwise we'd have a broken cyclical reference
	var writable = wireUpWritableInterface(stream, {
		onEnd: () => {
			debugWritable(`[#${objectID(stream)}] Underlying stream has reported a close event (upstreamHasEnded = ${upstreamHasEnded})`);

			if (!upstreamHasEnded) {
				debugWritable(`[#${objectID(stream)}] Issuing happy abort on converted stream`);
				convertedStream.abort(true, mostRecentSource);
			}
		},
		onError: (error) => {
			// Make sure we notify the pipeline, if any, by passing in the most recent source stream that we've seen.
			debugWritable(`[#${objectID(stream)}] Issuing error abort on converted stream due to: ${error.message}`);
			convertedStream.abort(error, mostRecentSource);
		}
	});

	return convertedStream;
}

let debugTransform = debug("promistream:from-node-stream:transform");

function fromTransform(stream) {
	let endHandled = false;
	let lastWrite = null;

	// FIXME: we need to specifically watch for the `error` and `end` events on the readable interface, to know when the transform stream has fully completed processing
	// 		Respond to the EndOfStream produced by the pushbuffer in this case
	// request, destroy
	let nodeReadable = wireUpReadableInterface(stream, {
		onEnd: () => {
			debugTransform(`[#${objectID(stream)}] Received end/close event from underlying stream`);
		},
		onError: () => {
			debugTransform(`[#${objectID(stream)}] Received error event from underlying stream`);
		}
	});

	// write, end, destroy
	let nodeWritable = wireUpWritableInterface(stream);

	/*
	So, let's talk about transform streams. They're a complicated beast to deal with - they separately implement a readable and writable interface (like any Duplex stream), but there is no *reliable* way to associate a given output value (on the readable interface) with a given input value (on the writable interface). However, we still need to respect the backpressure signals; which, in Node streams, are push-based rather than pull-based.

	Most of the complexity below serves this specific purpose - converting push-based backpressure mechanics into pull-based backpressure mechanics. We don't deal with the underlying streams directly, instead separately converting the readable and writable interfaces into simpler and more consistent abstractions each (the same ones as for fromReadable and fromWritable), and using those in our implementation below. This eliminates *most* Node-streams-specific weirdness from leaking into our backpressure conversion code.

	Our logic goes something like this:
	- Always try to read from the 'local' buffer first. This is handled by composing with @promistream/buffer automatically.
	- If there is nothing in the local buffer, then we talk to the wrapped Node stream. But first we try to read from the wrapper's push-buffer; this is a generic abstraction for converting from push-based APIs to pull-based APIs (and it gets wired up by wireUpReadableInterface above). That buffer is where values end up if the underlying Node stream disregards backpressure and keeps spamming values - this is unfortunately necessary to support for backwards compat reasons, as backpressure was a later addition to Node streams. Note that if the underlying stream has ended, the push-buffer will just conjure up an EndOfStream marker (as per the Promistream spec), and the read operation completes here.
	- If the push-buffer is *also* empty, see if there is still a 'write in progress'. This is a concept that only exists in our abstractions - the corresponding Nodestream concept is "the last write returned `false`, and we're waiting for a 'drain' event". If this is the case, we don't do anything else except wait for a new value to appear; we register a notification request with the push-buffer, which returns a Promise that will resolve once one or more new values are produced by the Nodestream - whether as a result of our previous write, or otherwise. Either way, that's our signal to continue, as there's now stuff to consume and return.
	- If there is *not* a write-in-progress, then by this point it's extremely unlikely that new values will ever be produced by the underlying Nodestream - all the buffers are empty, and backpressure has been relieved, which means that it is most likely expecting us to write something to it. We therefore do an 'upstream read' (from the Promistream pipeline) and pass the result into the Nodestream, registering the write as the lastWrite in case it ends up with backpressure (which a next write would need to wait on). We then try to immediately empty the push-buffer, in case the Nodestream has immediately/synchronously produced values from our input; if not, we just return nothing, and wait for the next read cycle to happen.

	This approach should yield the following technical properties:
	- Backpressure of the underlying Nodestream, where it is implemented, is respected.
	- Nodestreams that do *not* implement backpressure will get new values written to them every time our push-buffer (output buffer) runs out; so we still don't write more often than we *need* to (preferring to satisfy requests locally where possible), but we also don't block on waiting for new values to appear, which wouldn't work if there's no 1:1 correspondence between writes and reads in the Nodestream.
	- We do not write any more to the underlying Nodestream than we *have* to, for reliable behaviour; this is important to keep memory usage down. Likewise, we don't request values from upstream (which may incur a resource cost) unless we actually expect the Nodestream to be interested in them.
	- Nodestreams have very poor operational guarantees to begin with, but any Nodestream that behaves correctly in typical 'native' streams code *should* also behave correctly within our wrapper code.
	*/

	let convertedStream = {
		_promistreamVersion: 0,
		description: `converted Node.js transform stream`,
		abort: propagateAbort,
		peek: propagatePeek,
		read: function produceValue_nodeTransformStream(promistreamSource) {
			let pushBuffer = nodeReadable.consumeImmediateBuffer();

			if (pushBuffer.length > 0) {
				return pushBuffer;
			} else if (endHandled) {
				// NOTE: This logic exists at the start, not in the upstream EndOfStream handling code, because any number of buffer reads may be required before the wrapped Node stream can be closed
				// NOTE: The underlying push-buffer will automatically produce EndOfStream markers once the buffer has run out and the underlying stream has closed, so long as we're using the wireUpReadableInterface function
				// TODO: Refactor this design (and/or push-buffer itself) to use request-matcher instead?
				return Promise.try(() => {
					return nodeReadable.request();
				}).then((result) => {
					return [ result ];
				});
			} else {
				return Promise.try(() => {
					if (lastWrite != null && !lastWrite.isFulfilled()) {
						debugTransform(`[#${objectID(stream)}] Write already in progress; waiting for new readable values to become available...`);

						return nodeReadable.awaitBuffer();
					} else {
						return Promise.try(() => {
							debugTransform(`[#${objectID(stream)}] Doing upstream read...`);
							return promistreamSource.read();
						}).then((value) => {
							debugTransform(`[#${objectID(stream)}] Writing upstream value to nodestream's writable interface`);
							lastWrite = Promise.try(() => nodeWritable.write(value));

							// NOTE: We cannot just wait for the write here; it'll block if the internal buffer of the transform stream is full, and our read would block in turn, leading to the buffer never emptying. This would deadlock. Instead, we store the promise representing our write, so that the *next* write cycle can inspect it, which are the semantics we're actually after.

							// HACK: There is *technically* a race condition here; it's possible for another upstream read to start before the first read reaches the "writing to writable interface" point, in which case the second write will 'bypass' the lastWrite check and override the value of lastWrite. However, since all write operations listen to the same `drain` event on the underlying stream anyway, this shouldn't cause any change in behaviour. If the underlying write implementation ever changes, this approach needs to be reevaluated.
							// Note that this sort-of violates the Node streams API - which *allows* further writes while waiting for a drain event, it's just strongly recommended against. But for now we're assuming that if someone decides to use a parallelization stream, they are okay with the additional memory usage.
						});
					}
				}).then(() => {
					// This will quite possibly return an empty buffer, but that is fine; the `buffer` stream downstream from us will just keep reading (and therefore queueing up new items to be transformed) until it gets some results.
					debugTransform(`[#${objectID(stream)}] Consuming immediate buffer from nodestream's readable interface`);
					return nodeReadable.consumeImmediateBuffer();
				}).catch(isEndOfStream, () => {
					debugTransform(`[#${objectID(stream)}] End of upstream reached`);
					endHandled = true;

					debugTransform(`[#${objectID(stream)}] Closing via nodestream's writable interface`);
					nodeWritable.end();

					// Return nothing, let the next read call (and all of those after that) deal with either underlying stream completion or buffered results
					return [];
				});
			}
		}
	};

	return pipe([
		convertedStream,
		buffer()
	]);
}

module.exports.fromReadable = fromReadable;
module.exports.fromWritable = fromWritable;
module.exports.fromTransform = fromTransform;