nix-in-node/src/astformer/index.js

/* eslint-disable no-loop-func */
"use strict";

// Design note: We return stateLogs instead of passing in an object of registered handlers to call, because a node can become obsolete in mid-processing, and in those cases all of its state sets should be ignored. By far the easiest way to implement this, is to just keep a stateLog in the node handling context (since that entire context gets thrown away when processing gets aborted due to a subtree change), and let the parent deal with actually applying any still-relevant setStates to the correct handler functions.
// TODO: Figure out a way to track 'loss factor' per optimizer, ie. how many (partial or complete) node evaluations have been discarded due to the actions of that optimizer, including subtrees. This can give insight into which optimizers cause unreasonably much wasted work.

const util = require("util");
const splitFilter = require("split-filter");
const mapObj = require("fix-esm").require("map-obj").default;
const defaultValue = require("default-value");
const isPlainObj = require("fix-esm").require("is-plain-obj").default;
const findLast = require("find-last");

const NoChange = require("./actions/no-change");
const RemoveNode = require("./actions/remove-node");
const ConsumeNode = require("./actions/consume-node");
const typeOf = require("./util/type-of");
const concat = require("./util/concat");
const merge = require("./util/merge");
const measureTime = require("./util/measure-time");
const unreachable = require("@joepie91/unreachable")("jsnix");

const createHandlerTracker = require("./handler-tracker");
const createTimings = require("./timings-tracker");
const combineOptimizers = require("./combine-optimizers");
const createDebuggers = require("./create-debuggers");
const assureArray = require("assure-array");

const AnyChild = Symbol("AnyChild");

// FIXME: Implement a scope tracker of some sort, to decouple the code here a bit more
// TODO: Determine if we can improve performance by avoiding a lot of array allocations for the path tracking; by eg. nesting objects instead and unpacking it into an array on-demand
// FIXME: Verify that the various iterations=0 arguments are actually correct, and don't lose iteration count metadata

let EVALUATION_LIMIT = 10;

function defer(func) {
	return { __type: "defer", func: func };
}

function handleNodeChildren(node, handleASTNode, path, originalContext, contextOverrides) {
	let changedProperties = {};
	let stateLogs = [];

	function tryTransformItem(node, path, context) {
		// console.log("--- PASSING IN", { context });
		// console.log({path});
		if (node == null) {
			return node;
		// } else if (node.__raqbASTNode === true) {
		} else if (isPlainObj(node)) {
			// FIXME: Is it correct to not specify an initialStateLog here?
			let result = handleASTNode(node, 0, path, undefined, context);

			if (result.stateLog.length > 0) {
				stateLogs.push(result.stateLog);
			}

			return result.node;
		} else if (Array.isArray(node)) {
			let valuesHaveChanged = false;

			let transformedArray = node.map((value, i) => {
				let pathSegment = { type: "$array", key: i };
				let transformedValue = tryTransformItem(value, path.concat([ pathSegment ]), context);

				if (transformedValue !== value) {
					valuesHaveChanged = true;
				}

				return transformedValue;
			});

			if (valuesHaveChanged) {
				return transformedArray;
			} else {
				return node;
			}
		// } else if (isPlainObj(node)) {
		// 	let newObject = {};
		// 	let propertiesHaveChanged = false;

		// 	for (let [ key, value ] of Object.entries(node)) {
		// 		let pathSegment = { type: "$object", key: key };
		// 		let transformedValue = tryTransformItem(value, path.concat([ pathSegment ]));

		// 		if (transformedValue !== value) {
		// 			propertiesHaveChanged = true;
		// 		}

		// 		newObject[key] = transformedValue;
		// 	}

		// 	if (propertiesHaveChanged) {
		// 		return newObject;
		// 	} else {
		// 		return node;
		// 	}
		} else {
			// Probably some kind of literal value; we don't touch these.
			return node;
		}
	}

	// FIXME: Delete nulls?

	for (let [ property, value ] of Object.entries(node)) {
		let childPath = path.concat([{ type: node.type, key: property }]);

		let newContext = mergeContexts(originalContext, contextOverrides, property);
		// console.log("--- MERGE", { newContext, property, originalContext, contextOverrides });
		let transformedValue = tryTransformItem(value, childPath, newContext);

		if (transformedValue !== value) {
			changedProperties[property] = transformedValue;
		}
	}

	return {
		changedProperties: changedProperties,
		stateLog: concat(stateLogs)
	};
}

function mergeContexts(oldContext, overrides, property) {
	let propertyOverrides = overrides[property];
	let globalOverrides = overrides[AnyChild];

	if (propertyOverrides == null && globalOverrides == null) {
		// No changes
		return oldContext;
	} else {
		// console.log("--- MERGING!", { oldContext, globalOverrides, propertyOverrides });
		return {
			... oldContext,
			... globalOverrides ?? {},
			... propertyOverrides ?? {}
		};
	}
}

module.exports = function optimizeTree(ast, optimizers) {
	let debuggers = createDebuggers(optimizers);
	let visitors = combineOptimizers(optimizers);
	let timings = createTimings(optimizers);

	let visitorsByType = mapObj(visitors, (key, value) => {
		return [
			key,
			concat([
				defaultValue(value, []),
				defaultValue(visitors["*"], []),
			])
		];
	});

	function handleASTNode(node, iterations = 0, path = [], initialStateLog, context = {}) {
		// console.log({ path: path.map((item) => String(item.type)).join(" -> "), context });
		// console.log({ path: path.map((item) => String(item.key)).join(" -> "), context });
		// console.log(path.map((item) => String(item.key)).join(" -> "));

		// The stateLog contains a record of every setState call that was made during the handling of this node and its children. We keep a log for this rather than calling handlers directly, because setState calls should always apply to *ancestors*, not to the current node. That is, if the current node does a setState for `foo`, and also has a handler registered for `foo`, then that handler should not be called, but the `foo` handler in the *parent* node should be.
		// FIXME: Scope stateLog entries by optimizer name? To avoid name clashes for otherwise similar functionality. Like when multiple optimizers track column names. Also do this for context values! Maybe an escape hatch to deliberately define/reference globals or keys for other optimizers.
		let stateLog = [];
		let contextOverrides = {};
		let defers = [];
		let handlers = createHandlerTracker();
		let nodeVisitors = visitorsByType[node.type];

		function handleResult({ debuggerName, result, permitDefer, initialStateLog }) {
			if (result === NoChange) {
				// no-op
			} else if (result == null) {
				// FIXME: Figure out a better way to indicate the origin of such an issue, than the current error message format?
				// FIXME: Include information on which node this failed for
				throw new Error(`[${debuggerName}] A visitor is not allowed to return null or undefined; if you intended to leave the node untouched, return a NoChange marker instead`);
			} else if (result === RemoveNode) {
				debuggers[debuggerName](`Node of type '${typeOf(node)}' removed`);
				return { node: RemoveNode, stateLog: [] };
			} else if (result === ConsumeNode) {
				debuggers[debuggerName](`Node of type '${typeOf(node)}' consumed, but its stateLog was left intact`);
				stateLog.forEach((item) => { item.isFromConsumedNode = true; }); // NOTE: Mutates!
				return { node: ConsumeNode, stateLog: stateLog };
			} else if (result.__type === "defer") {
				if (permitDefer) {
					debuggers[debuggerName](`Defer was scheduled for node of type '${typeOf(node)}'`);
					defers.push({ debuggerName, func: result.func });
				} else {
					throw new Error(`Cannot schedule a defer from within a defer handler`);
				}
			} else /*if (result.__raqbASTNode === true)*/ {
				if (result === node) {
					// Visitor returned the original node again; but in this case, it should return NoChange instead. We enforce this because after future changes to the optimizer implementation (eg. using an internally-mutable deep copy of the tree), we may no longer be able to *reliably* detect when the original node is returned; so it's best to already get people into the habit of returning a NoChange marker in those cases, by disallowing this.
					throw new Error(`Visitor returned original node, but this may not work reliably; if you intended to leave the node untouched, return a NoChange marker instead`);
				} else {
					debuggers[debuggerName](`Node of type '${typeOf(node)}' replaced by node of type '${typeOf(result)}'`);

					if (iterations >= EVALUATION_LIMIT) {
						throw new Error(`Exceeded evaluation limit in optimizer ${debuggerName}; aborting optimization. If you are a user of this software, please report this as a bug. If you are a developer writing an optimizer, make sure that your optimizer eventually stabilizes on a terminal condition (ie. NoChange)!`);
					} else {
						return handleASTNode(result, iterations + 1, path, initialStateLog, context);
					}
				}
			// } else {
			// 	throw new Error(`Visitor returned an unexpected type of return value: ${util.inspect(result)}`);
			}
		}

		function handleStateLog(newStateLog) {
			let [ relevantState, otherState ] = splitFilter(newStateLog, (entry) => handlers.has(entry.name));

			stateLog = stateLog.concat(otherState);

			for (let item of relevantState) {
				// FIXME: Log these, and which visitor they originate from
				handlers.call(item.name, item.value);
			}
		}

		function applyVisitorFunction({ visitorName, func, node, permitDefer }) {
			let { value: result, time } = measureTime(() => {
				return func(node, {
					// eslint-disable-next-line no-loop-func
					setState: (name, value) => {
						// FIXME: util.inspect is slow, and not necessary when debug mode is disabled
						debuggers[visitorName](`Setting state for '${name}' from node of type '${typeOf(node)}': ${util.inspect(value, { colors: true })}`);
						stateLog.push({ name, value });
					},
					registerStateHandler: (name, func) => handlers.add(name, func),
					defer: (permitDefer === true) ? defer : null,
					findNearestStep: function (type) {
						return (type != null)
							? findLast(path, (item) => item.type === type)
							: path[path.length - 1];
					},
					setContext: (children, key, value) => {
						// FIXME: Turn this into an abstraction
						// FIXME: Disallow this once we are in a `defer`; using it there is a bug, as child nodes have already been processed, and so context cannot be propagated to them anymore. Should throw an error telling the user that they probably have a bug in their code.
						function setOne(child, key, value) {
							if (contextOverrides[child] == null) {
								contextOverrides[child] = {};
							}

							contextOverrides[child][key] = value;
						}

						if (children != null) {
							assureArray(children).forEach((child) => {
								setOne(child, key, value);
							});
						} else {
							setOne(AnyChild, key, value);
						}
					},
					getContext: (key) => {
						// NOTE: We *do not* consider contextOverrides here. A node cannot set context for itself, only for its children. Instead, contextOverrides gets handled when passing a new context object to a child node upon its evaluation.
						if (context[key] != null) {
							return context[key];
						} else {
							throw new Error(`No key '${key}' exists in the context here`);
						}
					}
				});
			});

			timings[visitorName] += time;

			return result;
		}

		if (nodeVisitors != null) {
			for (let visitor of nodeVisitors) {
				let handled = handleResult({
					debuggerName: visitor.name,
					result: applyVisitorFunction({
						visitorName: visitor.name,
						func: visitor.func,
						node: node,
						permitDefer: true
					}),
					permitDefer: true
				});

				if (handled != null) {
					// Handling of the current node was aborted
					return handled;
				}
			}
		}

		let childResult = handleNodeChildren(node, handleASTNode, path, context, contextOverrides);

		if (Object.keys(childResult.changedProperties).length > 0) {
			let newNode = merge(node, childResult.changedProperties);

			// We already know that the new node is a different one, but let's just lead it through the same handleResult process, for consistency. Handling of the pre-child-changes node is aborted here, and we re-evaluate with the new node.
			let reevaluatedResult = handleResult({
				debuggerName: "(subtree change)",
				result: newNode,
				permitDefer: false,
				// NOTE: If we have any leftover state from nodes that were consumed upstream, we should make sure to include this in the reevaluation, even when the subtree was replaced!
				initialStateLog: (childResult.stateLog.length > 0)
					? childResult.stateLog.filter((item) => item.isFromConsumedNode)
					: undefined
			});

			return reevaluatedResult;
		}

		if (initialStateLog != null) {
			// NOTE: We intentionally process the initialStateLog here and not earlier; that way it is consistent with how any retained stateLog entries *would* have executed on the node before it got replaced (ie. after evaluation of the children). Conceptually you can think of it as the initialStateLog being prefixed to the stateLog of the childResult.
			handleStateLog(initialStateLog);
		}

		if (childResult.stateLog.length > 0) {
			handleStateLog(childResult.stateLog);
		}

		for (let defer of defers) {
			let handled = handleResult({
				debuggerName: `${defer.debuggerName} (deferred)`,
				result: applyVisitorFunction({
					visitorName: defer.debuggerName,
					func: defer.func,
					node: node,
					permitDefer: false
				}),
				permitDefer: false
			});

			if (handled != null) {
				// Handling of the current node was aborted
				return handled;
			}
		}

		return {
			stateLog: stateLog,
			node: node
		};
	}

	let { value: rootResult, time } = measureTime(() => {
		return handleASTNode(ast);
	});

	let timeSpentInOptimizers = Object.values(timings).reduce((sum, n) => sum + n, 0);

	if (rootResult.node !== RemoveNode && rootResult.node !== ConsumeNode) {
		return {
			ast: rootResult.node,
			timings: {
				"# Total": time,
				"# Walker overhead": time - timeSpentInOptimizers,
				... timings,
			}
		};
	} else {
		unreachable("Root node was removed");
	}
};