Browse Source

WIP, move out astformer

master
Sven Slootweg 4 months ago
parent
commit
42d44661b3
  1. 2
      samples/attrsets.nix
  2. 6
      src/astformer/actions/consume-node.js
  3. 3
      src/astformer/actions/no-change.js
  4. 3
      src/astformer/actions/remove-node.js
  5. 20
      src/astformer/combine-optimizers.js
  6. 16
      src/astformer/create-debuggers.js
  7. 27
      src/astformer/handler-tracker.js
  8. 361
      src/astformer/index.js
  9. 12
      src/astformer/timings-tracker.js
  10. 11
      src/astformer/util/concat.js
  11. 21
      src/astformer/util/measure-time.js
  12. 5
      src/astformer/util/merge.js
  13. 12
      src/astformer/util/type-of.js
  14. 65
      src/evaluate.js
  15. 29
      src/transformers/attribute-sets.js
  16. 2
      src/transformers/desugar-attrsets.js
  17. 2
      src/transformers/desugar-inherits.js
  18. 2
      src/transformers/desugar-interpolation-expressions.js
  19. 2
      src/transformers/mangle-identifiers.js
  20. 2
      src/transpile.js
  21. 10
      testers/transform.js
  22. 2
      tests/upstream-nix.js

2
samples/attrsets.nix

@ -5,4 +5,4 @@ rec {
"${b}s" = { f = 4; };
# FIXME: The below currently breaks desugar-attrsets
# ${c}.d = { g = 6; };
}
}.his.f

6
src/astformer/actions/consume-node.js

@ -1,6 +0,0 @@
"use strict";
// NOTE: This marker differs from RemoveNode in that it *doesn't* wipe out the state collected by the removed node; that is, it is assumed that the node is "consumed" and the stateLog is the result of that consumption. This is useful for various "meta-operations" which just serve to annotate some other operation with a modifier, and where the meta-operations themselves do not have any representation in the resulting query. In those cases, the meta-operation would be consumed and the parent node updated to reflect the modifier.
// FIXME: Check for existing places in optimizers where nodes are currently left lingering around, that should be consumed instead
module.exports = Symbol("ConsumeNode");

3
src/astformer/actions/no-change.js

@ -1,3 +0,0 @@
"use strict";
module.exports = Symbol("NoChange");

3
src/astformer/actions/remove-node.js

@ -1,3 +0,0 @@
"use strict";
module.exports = Symbol("RemoveNode");

20
src/astformer/combine-optimizers.js

@ -1,20 +0,0 @@
"use strict";
module.exports = function combineOptimizers(optimizers) {
let allVisitors = {};
for (let optimizer of optimizers) {
for (let [ key, visitor ] of Object.entries(optimizer.visitors)) {
if (allVisitors[key] == null) {
allVisitors[key] = [];
}
allVisitors[key].push({
name: optimizer.name,
func: visitor
});
}
}
return allVisitors;
};

16
src/astformer/create-debuggers.js

@ -1,16 +0,0 @@
"use strict";
const debug = require("debug");
module.exports = function createDebuggers(optimizers) {
let debuggers = {};
for (let optimizer of optimizers) {
debuggers[optimizer.name] = debug(`astformer:${optimizer.name}`);
debuggers[`${optimizer.name} (deferred)`] = debug(`astformer:${optimizer.name} (deferred)`);
}
debuggers["(subtree change)"] = debug(`astformer:(subtree change)`);
return debuggers;
};

27
src/astformer/handler-tracker.js

@ -1,27 +0,0 @@
"use strict";
module.exports = function createHandlerTracker() {
let handlers = new Map();
return {
add: function (name, func) {
if (!handlers.has(name)) {
handlers.set(name, []);
}
handlers.get(name).push(func);
},
call: function (name, value) {
let funcs = handlers.get(name);
if (funcs != null) {
for (let func of funcs) {
func(value);
}
}
},
has: function (name) {
return handlers.has(name);
}
};
};

361
src/astformer/index.js

@ -1,361 +0,0 @@
/* eslint-disable no-loop-func */
"use strict";
// Design note: We return stateLogs instead of passing in an object of registered handlers to call, because a node can become obsolete in mid-processing, and in those cases all of its state sets should be ignored. By far the easiest way to implement this, is to just keep a stateLog in the node handling context (since that entire context gets thrown away when processing gets aborted due to a subtree change), and let the parent deal with actually applying any still-relevant setStates to the correct handler functions.
// TODO: Figure out a way to track 'loss factor' per optimizer, ie. how many (partial or complete) node evaluations have been discarded due to the actions of that optimizer, including subtrees. This can give insight into which optimizers cause unreasonably much wasted work.
const util = require("util");
const splitFilter = require("split-filter");
const mapObj = require("fix-esm").require("map-obj").default;
const defaultValue = require("default-value");
const isPlainObj = require("fix-esm").require("is-plain-obj").default;
const findLast = require("find-last");
const NoChange = require("./actions/no-change");
const RemoveNode = require("./actions/remove-node");
const ConsumeNode = require("./actions/consume-node");
const typeOf = require("./util/type-of");
const concat = require("./util/concat");
const merge = require("./util/merge");
const measureTime = require("./util/measure-time");
const unreachable = require("@joepie91/unreachable")("jsnix");
const createHandlerTracker = require("./handler-tracker");
const createTimings = require("./timings-tracker");
const combineOptimizers = require("./combine-optimizers");
const createDebuggers = require("./create-debuggers");
const assureArray = require("assure-array");
const AnyChild = Symbol("AnyChild");
// FIXME: Implement a scope tracker of some sort, to decouple the code here a bit more
// TODO: Determine if we can improve performance by avoiding a lot of array allocations for the path tracking; by eg. nesting objects instead and unpacking it into an array on-demand
// FIXME: Verify that the various iterations=0 arguments are actually correct, and don't lose iteration count metadata
let EVALUATION_LIMIT = 10;
function defer(func) {
return { __type: "defer", func: func };
}
function handleNodeChildren(node, handleASTNode, path, originalContext, contextOverrides) {
let changedProperties = {};
let stateLogs = [];
function tryTransformItem(node, path, context) {
// console.log("--- PASSING IN", { context });
// console.log({path});
if (node == null) {
return node;
// } else if (node.__raqbASTNode === true) {
} else if (isPlainObj(node)) {
// FIXME: Is it correct to not specify an initialStateLog here?
let result = handleASTNode(node, 0, path, undefined, context);
if (result.stateLog.length > 0) {
stateLogs.push(result.stateLog);
}
return result.node;
} else if (Array.isArray(node)) {
let valuesHaveChanged = false;
let transformedArray = node.map((value, i) => {
let pathSegment = { type: "$array", key: i };
let transformedValue = tryTransformItem(value, path.concat([ pathSegment ]), context);
if (transformedValue !== value) {
valuesHaveChanged = true;
}
return transformedValue;
});
if (valuesHaveChanged) {
return transformedArray;
} else {
return node;
}
// } else if (isPlainObj(node)) {
// let newObject = {};
// let propertiesHaveChanged = false;
// for (let [ key, value ] of Object.entries(node)) {
// let pathSegment = { type: "$object", key: key };
// let transformedValue = tryTransformItem(value, path.concat([ pathSegment ]));
// if (transformedValue !== value) {
// propertiesHaveChanged = true;
// }
// newObject[key] = transformedValue;
// }
// if (propertiesHaveChanged) {
// return newObject;
// } else {
// return node;
// }
} else {
// Probably some kind of literal value; we don't touch these.
return node;
}
}
// FIXME: Delete nulls?
for (let [ property, value ] of Object.entries(node)) {
let childPath = path.concat([{ type: node.type, key: property }]);
let newContext = mergeContexts(originalContext, contextOverrides, property);
// console.log("--- MERGE", { newContext, property, originalContext, contextOverrides });
let transformedValue = tryTransformItem(value, childPath, newContext);
if (transformedValue !== value) {
changedProperties[property] = transformedValue;
}
}
return {
changedProperties: changedProperties,
stateLog: concat(stateLogs)
};
}
function mergeContexts(oldContext, overrides, property) {
let propertyOverrides = overrides[property];
let globalOverrides = overrides[AnyChild];
if (propertyOverrides == null && globalOverrides == null) {
// No changes
return oldContext;
} else {
// console.log("--- MERGING!", { oldContext, globalOverrides, propertyOverrides });
return {
... oldContext,
... globalOverrides ?? {},
... propertyOverrides ?? {}
};
}
}
module.exports = function optimizeTree(ast, optimizers) {
let debuggers = createDebuggers(optimizers);
let visitors = combineOptimizers(optimizers);
let timings = createTimings(optimizers);
let visitorsByType = mapObj(visitors, (key, value) => {
return [
key,
concat([
defaultValue(value, []),
defaultValue(visitors["*"], []),
])
];
});
function handleASTNode(node, iterations = 0, path = [], initialStateLog, context = {}) {
// console.log({ path: path.map((item) => String(item.type)).join(" -> "), context });
// console.log({ path: path.map((item) => String(item.key)).join(" -> "), context });
// console.log(path.map((item) => String(item.key)).join(" -> "));
// The stateLog contains a record of every setState call that was made during the handling of this node and its children. We keep a log for this rather than calling handlers directly, because setState calls should always apply to *ancestors*, not to the current node. That is, if the current node does a setState for `foo`, and also has a handler registered for `foo`, then that handler should not be called, but the `foo` handler in the *parent* node should be.
// FIXME: Scope stateLog entries by optimizer name? To avoid name clashes for otherwise similar functionality. Like when multiple optimizers track column names. Also do this for context values! Maybe an escape hatch to deliberately define/reference globals or keys for other optimizers.
let stateLog = [];
let contextOverrides = {};
let defers = [];
let handlers = createHandlerTracker();
let nodeVisitors = visitorsByType[node.type];
function handleResult({ debuggerName, result, permitDefer, initialStateLog }) {
if (result === NoChange) {
// no-op
} else if (result == null) {
// FIXME: Figure out a better way to indicate the origin of such an issue, than the current error message format?
// FIXME: Include information on which node this failed for
throw new Error(`[${debuggerName}] A visitor is not allowed to return null or undefined; if you intended to leave the node untouched, return a NoChange marker instead`);
} else if (result === RemoveNode) {
debuggers[debuggerName](`Node of type '${typeOf(node)}' removed`);
return { node: RemoveNode, stateLog: [] };
} else if (result === ConsumeNode) {
debuggers[debuggerName](`Node of type '${typeOf(node)}' consumed, but its stateLog was left intact`);
stateLog.forEach((item) => { item.isFromConsumedNode = true; }); // NOTE: Mutates!
return { node: ConsumeNode, stateLog: stateLog };
} else if (result.__type === "defer") {
if (permitDefer) {
debuggers[debuggerName](`Defer was scheduled for node of type '${typeOf(node)}'`);
defers.push({ debuggerName, func: result.func });
} else {
throw new Error(`Cannot schedule a defer from within a defer handler`);
}
} else /*if (result.__raqbASTNode === true)*/ {
if (result === node) {
// Visitor returned the original node again; but in this case, it should return NoChange instead. We enforce this because after future changes to the optimizer implementation (eg. using an internally-mutable deep copy of the tree), we may no longer be able to *reliably* detect when the original node is returned; so it's best to already get people into the habit of returning a NoChange marker in those cases, by disallowing this.
throw new Error(`Visitor returned original node, but this may not work reliably; if you intended to leave the node untouched, return a NoChange marker instead`);
} else {
debuggers[debuggerName](`Node of type '${typeOf(node)}' replaced by node of type '${typeOf(result)}'`);
if (iterations >= EVALUATION_LIMIT) {
throw new Error(`Exceeded evaluation limit in optimizer ${debuggerName}; aborting optimization. If you are a user of this software, please report this as a bug. If you are a developer writing an optimizer, make sure that your optimizer eventually stabilizes on a terminal condition (ie. NoChange)!`);
} else {
return handleASTNode(result, iterations + 1, path, initialStateLog, context);
}
}
// } else {
// throw new Error(`Visitor returned an unexpected type of return value: ${util.inspect(result)}`);
}
}
function handleStateLog(newStateLog) {
let [ relevantState, otherState ] = splitFilter(newStateLog, (entry) => handlers.has(entry.name));
stateLog = stateLog.concat(otherState);
for (let item of relevantState) {
// FIXME: Log these, and which visitor they originate from
handlers.call(item.name, item.value);
}
}
function applyVisitorFunction({ visitorName, func, node, permitDefer }) {
let { value: result, time } = measureTime(() => {
return func(node, {
// eslint-disable-next-line no-loop-func
setState: (name, value) => {
// FIXME: util.inspect is slow, and not necessary when debug mode is disabled
debuggers[visitorName](`Setting state for '${name}' from node of type '${typeOf(node)}': ${util.inspect(value, { colors: true })}`);
stateLog.push({ name, value });
},
registerStateHandler: (name, func) => handlers.add(name, func),
defer: (permitDefer === true) ? defer : null,
findNearestStep: function (type) {
return (type != null)
? findLast(path, (item) => item.type === type)
: path[path.length - 1];
},
setContext: (children, key, value) => {
// FIXME: Turn this into an abstraction
// FIXME: Disallow this once we are in a `defer`; using it there is a bug, as child nodes have already been processed, and so context cannot be propagated to them anymore. Should throw an error telling the user that they probably have a bug in their code.
function setOne(child, key, value) {
if (contextOverrides[child] == null) {
contextOverrides[child] = {};
}
contextOverrides[child][key] = value;
}
if (children != null) {
assureArray(children).forEach((child) => {
setOne(child, key, value);
});
} else {
setOne(AnyChild, key, value);
}
},
getContext: (key) => {
// NOTE: We *do not* consider contextOverrides here. A node cannot set context for itself, only for its children. Instead, contextOverrides gets handled when passing a new context object to a child node upon its evaluation.
if (context[key] != null) {
return context[key];
} else {
throw new Error(`No key '${key}' exists in the context here`);
}
}
});
});
timings[visitorName] += time;
return result;
}
if (nodeVisitors != null) {
for (let visitor of nodeVisitors) {
let handled = handleResult({
debuggerName: visitor.name,
result: applyVisitorFunction({
visitorName: visitor.name,
func: visitor.func,
node: node,
permitDefer: true
}),
permitDefer: true
});
if (handled != null) {
// Handling of the current node was aborted
return handled;
}
}
}
let childResult = handleNodeChildren(node, handleASTNode, path, context, contextOverrides);
if (Object.keys(childResult.changedProperties).length > 0) {
let newNode = merge(node, childResult.changedProperties);
// We already know that the new node is a different one, but let's just lead it through the same handleResult process, for consistency. Handling of the pre-child-changes node is aborted here, and we re-evaluate with the new node.
let reevaluatedResult = handleResult({
debuggerName: "(subtree change)",
result: newNode,
permitDefer: false,
// NOTE: If we have any leftover state from nodes that were consumed upstream, we should make sure to include this in the reevaluation, even when the subtree was replaced!
initialStateLog: (childResult.stateLog.length > 0)
? childResult.stateLog.filter((item) => item.isFromConsumedNode)
: undefined
});
return reevaluatedResult;
}
if (initialStateLog != null) {
// NOTE: We intentionally process the initialStateLog here and not earlier; that way it is consistent with how any retained stateLog entries *would* have executed on the node before it got replaced (ie. after evaluation of the children). Conceptually you can think of it as the initialStateLog being prefixed to the stateLog of the childResult.
handleStateLog(initialStateLog);
}
if (childResult.stateLog.length > 0) {
handleStateLog(childResult.stateLog);
}
for (let defer of defers) {
let handled = handleResult({
debuggerName: `${defer.debuggerName} (deferred)`,
result: applyVisitorFunction({
visitorName: defer.debuggerName,
func: defer.func,
node: node,
permitDefer: false
}),
permitDefer: false
});
if (handled != null) {
// Handling of the current node was aborted
return handled;
}
}
return {
stateLog: stateLog,
node: node
};
}
let { value: rootResult, time } = measureTime(() => {
return handleASTNode(ast);
});
let timeSpentInOptimizers = Object.values(timings).reduce((sum, n) => sum + n, 0);
if (rootResult.node !== RemoveNode && rootResult.node !== ConsumeNode) {
return {
ast: rootResult.node,
timings: {
"# Total": time,
"# Walker overhead": time - timeSpentInOptimizers,
... timings,
}
};
} else {
unreachable("Root node was removed");
}
};

12
src/astformer/timings-tracker.js

@ -1,12 +0,0 @@
"use strict";
module.exports = function createTimings(optimizers) {
let timings = {};
for (let optimizer of optimizers) {
// timings[optimizer.name] = 0n;
timings[optimizer.name] = 0;
}
return timings;
};

11
src/astformer/util/concat.js

@ -1,11 +0,0 @@
"use strict";
module.exports = function concat(arrays) {
if (arrays.length === 0) {
return [];
} else if (arrays.length === 1) {
return arrays[0];
} else {
return arrays[0].concat(... arrays.slice(1));
}
};

21
src/astformer/util/measure-time.js

@ -1,21 +0,0 @@
"use strict";
// FIXME: Replace with `time-call` package
function hrtimeToNanoseconds(time) {
// If the numbers here become big enough to cause loss of precision, we probably have bigger issues than numeric precision...
return (time[0] * 1e9) + time[1];
}
module.exports = function measureTime(func) {
// let startTime = process.hrtime.bigint();
let startTime = hrtimeToNanoseconds(process.hrtime());
let result = func();
// let endTime = process.hrtime.bigint();
let endTime = hrtimeToNanoseconds(process.hrtime());
return {
value: result,
time: (endTime - startTime)
};
};

5
src/astformer/util/merge.js

@ -1,5 +0,0 @@
"use strict";
module.exports = function merge(... items) {
return Object.assign({}, ... items);
};

12
src/astformer/util/type-of.js

@ -1,12 +0,0 @@
"use strict";
module.exports = function typeOf(value) {
// FIXME: Better check
if (value == null) {
return null;
} else if (typeof value === "object") {
return value.type;
} else {
return null;
}
};

65
src/evaluate.js

@ -1,13 +1,74 @@
"use strict";
const measureTime = require("./astformer/util/measure-time");
const measureTime = require("astformer/util/measure-time"); // FIXME
const transpile = require("./transpile");
module.exports = function evaluate(nixCode) {
let transpiled = transpile(nixCode);
function lazyWrap(func) {
return () => func;
}
const builtins = {
seq: lazyWrap(($a) => ($b) => {
// First evaluate the first argument...
$a();
// ... then evaluate and return the second argument.
return $b();
}),
splitVersion: lazyWrap(($version) => {
let version = $version();
// FIXME: assert string
let parts = [];
let currentPart = "";
let isNumber = null;
function finalizePart() {
if (currentPart !== "") {
// NOTE: Numbers get added to the list as strings anyway. This is really weird considering `nix-env -u`s comparison logic, but it's how upstream Nix works too.
parts.push(currentPart);
currentPart = "";
isNumber = null;
}
}
// FIXME: Is it correct to assume that only the ASCII character set is supported here?
// TODO: Replace this with a proper parser some day
for (let i = 0; i < version.length; i++) {
let code = version.charCodeAt(i);
if (code >= 48 && code <= 57) {
// Digit
if (isNumber !== true) {
finalizePart();
isNumber = true;
}
currentPart += version[i];
} else if ((code >= 65 && code <= 90) || (code >= 97 && code <= 122)) {
// Letter (uppercase and lowercase respectively)
if (isNumber !== false) {
finalizePart();
isNumber = false;
}
currentPart += version[i];
} else {
finalizePart();
}
}
finalizePart();
return parts;
})
};
const api = {
builtins: {},
builtins: () => builtins,
$memoize: function (func) {
let isCalled = false;
let storedResult;

29
src/transformers/attribute-sets.js

@ -3,9 +3,10 @@
const assert = require("assert");
const types = require("@babel/types");
const template = require("@babel/template").default;
const splitFilter = require("split-filter");
const templateExpression = require("./util/template-expression");
const NoChange = require("../astformer/actions/no-change");
const NoChange = require("astformer/actions/no-change"); // FIXME
const lazyWrapper = require("./templates/lazy-wrapper");
const callLazyWrapper = require("./templates/call-lazy-wrapper");
const objectLiteral = require("./templates/object-literal");
@ -35,20 +36,21 @@ let tmplScopeWrapper = templateExpression(`(
let tmplDynamicScopeWrapper = templateExpression(`(
(() => {
%%keyAssertion%%;
let $attributes = {};
with ($attributes) {
/* Static and overrides */
Object.assign($attributes, {
%%bindings%%;
});
Object.assign($attributes, %%staticBindings%%);
%%keyAssertion%%;
/* Dynamic bindings */
Object.assign($attributes, %%dynamicBindings%%);
}
return $attributes;
})()
)`);
)`, { strictMode: false });
// FIXME: Verify that this always works, and that we don't need `var` for hoisting!
let tmplRecursiveBinding = template(`
@ -67,7 +69,7 @@ function objectNormal(bindings) {
})),
keyAssertion: bindings.some((binding) => typeof binding.name !== "string")
// Only needed when dealing with dynamic keys
? assertKeys(bindings.map(({ name }) => implicitStringLiteral(name)))
? assertKeys(bindings)
: null
});
}
@ -88,9 +90,13 @@ function objectRecursiveStatic(bindings) {
}
function objectRecursiveDynamic(bindings) {
throw new Error(`UNIMPLEMENTED: Dynamic bindings are not supported yet`);
// NOTE: We assign static bindings first and then in a separate pass the dynamic bindings; that way, the dynamic bindings' keys can refer to values set in the static binding pass. For the same reason, we only check duplicate keys *after* setting up the static bindings.
let [ staticBindings, dynamicBindings ] = splitFilter(bindings, (binding) => typeof binding.name === "string");
return tmplDynamicScopeWrapper({
keyAssertion: assertKeys(bindings),
staticBindings: objectLiteral(staticBindings.map(({ name, expression }) => [ name, expression ])),
dynamicBindings: objectLiteral(dynamicBindings.map(({ name, expression }) => [ name, expression ]))
});
}
@ -102,7 +108,8 @@ function implicitStringLiteral(node) {
}
}
function assertKeys(keys) {
function assertKeys(bindings) {
let keys = bindings.map(({ name }) => implicitStringLiteral(name));
return tmplAssertKeys({ keyList: types.arrayExpression(keys) });
}

2
src/transformers/desugar-attrsets.js

@ -2,7 +2,7 @@
const unreachable = require("@joepie91/unreachable")("jsNix");
const NoChange = require("../astformer/actions/no-change");
const NoChange = require("astformer/actions/no-change"); // FIXME
const { NixAttributeIdentifier, NixAttributeSet, NixBinding } = require("./util/nix-types");

2
src/transformers/desugar-inherits.js

@ -3,7 +3,7 @@
const splitFilter = require("split-filter");
const assert = require("assert");
const NoChange = require("../astformer/actions/no-change");
const NoChange = require("astformer/actions/no-change"); // FIXME
const nixTypes = require("./util/nix-types");
module.exports = {

2
src/transformers/desugar-interpolation-expressions.js

@ -1,6 +1,6 @@
"use strict";
const NoChange = require("../astformer/actions/no-change");
const NoChange = require("astformer/actions/no-change"); // FIXME
module.exports = {
name: "desugar-interpolation-expressions",

2
src/transformers/mangle-identifiers.js

@ -1,6 +1,6 @@
"use strict";
const NoChange = require("../astformer/actions/no-change");
const NoChange = require("astformer/actions/no-change"); // FIXME
const mangleName = require("../mangle-name");
function mangleNode(node) {

2
src/transpile.js

@ -1,6 +1,6 @@
"use strict";
const astformer = require("./astformer");
const astformer = require("astformer");
const parse = require("./parse");
const printAST = require("./print-ast");

10
testers/transform.js

@ -4,11 +4,17 @@ const fs = require("fs");
const assert = require("assert");
const parse = require("../src/parse");
const astformer = require("../src/astformer");
const astformer = require("astformer");
const transformers = require("../src/transformers");
const printAST = require("../src/print-ast");
assert(process.argv[2] != null);
let tree = parse(fs.readFileSync(process.argv[2], "utf8"));
for (let i = 0; i < 10000; i++) {
astformer(tree, transformers);
}
let transformed = astformer(tree, transformers);
printAST(transformed);
printAST(transformed.ast);
console.log(transformed.timings);

2
tests/upstream-nix.js

@ -21,6 +21,8 @@ let tests = fs.readdirSync(testsPath)
function formatResultNode(node) {
if (typeof node === "string") {
return `"${node.replace(/"/g, '\\"')}"`;
} else if (Array.isArray(node)) {
return `[ ${node.map(formatResultNode).join(" ")} ]`;
} else {
return node.toString();
}

Loading…
Cancel
Save