WIP
parent
6985a0dc47
commit
8bbd02eec8
@ -0,0 +1,58 @@
|
||||
"use strict";
|
||||
|
||||
const pickRandomWeighted = require("pick-random-weighted");
|
||||
const syncpipe = require("syncpipe");
|
||||
|
||||
let columns = {
|
||||
color: [ "blue", "black", "brown", "red", "yellow", "gray", "white", "pink", "purple" ],
|
||||
size: [ "XS", "S", "M", "L", "XL", "XXL" ],
|
||||
country_id: [ 1, 2, 3, 4, 5 ],
|
||||
store_id: [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ],
|
||||
price: [ 2, 5.50, 12, 20, 22.50 ]
|
||||
};
|
||||
|
||||
function shuffle(arrayReference) {
|
||||
arrayReference.sort(() => Math.random() - 0.5);
|
||||
}
|
||||
|
||||
function shuffleAllColumns() {
|
||||
for (let key of Object.keys(columns)) {
|
||||
shuffle(columns[key]);
|
||||
}
|
||||
}
|
||||
|
||||
let printColumns = Object.keys(columns);
|
||||
function printItem(item) {
|
||||
console.log(printColumns.map((column) => item[column]).join(";"));
|
||||
}
|
||||
|
||||
console.log(printColumns.join(";"));
|
||||
|
||||
for (let i = 0; i < 200; i++) {
|
||||
shuffleAllColumns();
|
||||
|
||||
// We do this because we *don't* want an even distribution, to make the data more realistic.
|
||||
let weightedColumns = Object.entries(columns).map(([ key, values ]) => {
|
||||
return [
|
||||
key,
|
||||
values.map((value, i) => {
|
||||
return [ value, i ];
|
||||
})
|
||||
];
|
||||
});
|
||||
|
||||
for (let i = 0; i < 1000; i++) {
|
||||
let data = syncpipe(weightedColumns, [
|
||||
(_) => _.map(([ key, values ]) => {
|
||||
return [
|
||||
key,
|
||||
pickRandomWeighted(values)
|
||||
];
|
||||
}),
|
||||
(_) => Object.fromEntries(_)
|
||||
]);
|
||||
|
||||
printItem(data);
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,254 @@
|
||||
"use strict";
|
||||
|
||||
const util = require("util");
|
||||
const syncpipe = require("syncpipe");
|
||||
const debug = require("debug");
|
||||
const defaultValue = require("default-value");
|
||||
const mapObj = require("map-obj");
|
||||
|
||||
const NoChange = require("../../optimizers/util/no-change");
|
||||
const RemoveNode = require("../../optimizers/util/remove-node");
|
||||
const typeOf = require("../../type-of");
|
||||
const unreachable = require("../../unreachable");
|
||||
const measureTime = require("../../measure-time");
|
||||
const concat = require("../../concat");
|
||||
|
||||
// FIXME: Consider deepcopying the tree once, and then mutating that tree, instead of doing everything immutably; this might be significantly faster when a few iterations are needed to stabilize the tree, as that might otherwise result in many copies of the subtree(s) leeding up to the changed node(s), one for each iteration.
|
||||
// FIXME: Consider whether inverting the evaluation order (deepest-first rather than shallowest-first) can remove the need for multiple optimization passes and stabilization detection.
|
||||
// FIXME: Verify that changed nodes actually result in a change in where the walker goes!
|
||||
|
||||
function createDebuggers(optimizers) {
|
||||
let debuggers = {};
|
||||
|
||||
for (let optimizer of optimizers) {
|
||||
debuggers[optimizer.name] = debug(`raqb:ast:optimize:${optimizer.name}`);
|
||||
}
|
||||
|
||||
return debuggers;
|
||||
}
|
||||
|
||||
function createTimings(optimizers) {
|
||||
let timings = {};
|
||||
|
||||
for (let optimizer of optimizers) {
|
||||
// timings[optimizer.name] = 0n;
|
||||
timings[optimizer.name] = 0;
|
||||
}
|
||||
|
||||
return timings;
|
||||
}
|
||||
|
||||
function combineOptimizers(optimizers) {
|
||||
let allVisitors = {};
|
||||
|
||||
for (let optimizer of optimizers) {
|
||||
for (let [ key, visitor ] of Object.entries(optimizer.visitors)) {
|
||||
if (allVisitors[key] == null) {
|
||||
allVisitors[key] = [];
|
||||
}
|
||||
|
||||
allVisitors[key].push({
|
||||
name: optimizer.name,
|
||||
func: visitor
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return allVisitors;
|
||||
}
|
||||
|
||||
// FIXME: StopMatching marker to signal that eg. a generic visitor should no longer match after a specific one?
|
||||
// FIXME: OriginalNode marker to explicitly indicate that any transformations applied by *other* visitors should be thrown out?
|
||||
|
||||
function defer(func) {
|
||||
return { __type: "defer", func: func };
|
||||
}
|
||||
|
||||
module.exports = function optimizeTree(ast, optimizers) {
|
||||
// NOTE: Depth-first!
|
||||
let visitors = combineOptimizers(optimizers);
|
||||
let timings = createTimings(optimizers);
|
||||
let debuggers = createDebuggers(optimizers);
|
||||
// FIXME: Dirty tracking for stabilization detection
|
||||
|
||||
let visitorsByType = mapObj(visitors, (key, value) => {
|
||||
return concat([
|
||||
defaultValue(visitors[key], []),
|
||||
defaultValue(visitors["*"], []),
|
||||
]);
|
||||
});
|
||||
|
||||
function handle(node/*, parentStateHandlers*/) {
|
||||
let deferFuncs = [];
|
||||
// let stateHandlers = Object.create(parentStateHandlers);
|
||||
let stateLog = [];
|
||||
|
||||
function registerStateHandler(name, func) {
|
||||
stateHandlers[name] = function (value) {
|
||||
// FIXME: test setParentState
|
||||
return func(value, { setState: setParentState });
|
||||
};
|
||||
}
|
||||
|
||||
function setParentState(name, value) {
|
||||
if (parentStateHandlers[name] != null) {
|
||||
parentStateHandlers[name](value);
|
||||
}
|
||||
}
|
||||
|
||||
function setState(name, value) {
|
||||
stateLog.push({ type: "set", name, value });
|
||||
// if (stateHandlers[name] != null) {
|
||||
// stateHandlers[name](value);
|
||||
// }
|
||||
}
|
||||
|
||||
function resetAllListeners() {
|
||||
// Called whenever a node is invalidated (replaced, removed, ...) and therefore no longer has a reason to receive any kind of callbacks
|
||||
deferFuncs = [];
|
||||
// stateHandlers = Object.create(parentStateHandlers);
|
||||
stateLog.push({ type: "reset" });
|
||||
}
|
||||
|
||||
function applyVisitors(node) {
|
||||
let nodeVisitors = visitorsByType[node.type];
|
||||
|
||||
/*
|
||||
run each visitor on the node, until either
|
||||
a) the node is invalidated (removed, substituted)
|
||||
b) the list of visitors is finished
|
||||
|
||||
then process all children
|
||||
|
||||
then
|
||||
*/
|
||||
|
||||
if (nodeVisitors == null) {
|
||||
return node;
|
||||
} else {
|
||||
let lastNode = node;
|
||||
|
||||
for (let visitor of nodeVisitors) {
|
||||
// eslint-disable-next-line no-loop-func
|
||||
let { value: result, time } = measureTime(() => {
|
||||
return visitor.func(lastNode, { registerStateHandler, setState, defer });
|
||||
});
|
||||
// FIXME: Re-evaluate children after presence in a new subtree? Is this necessary for making sure the context tree is correct?
|
||||
|
||||
timings[visitor.name] += time;
|
||||
|
||||
if (result === NoChange) {
|
||||
// no-op
|
||||
} else if (result == null) {
|
||||
throw new Error(`A visitor is not allowed to return null or undefined; if you intended to leave the node untouched, return a NoChange marker instead`);
|
||||
} else if (result === RemoveNode) {
|
||||
debuggers[visitor.name](`Node of type '${typeOf(lastNode)}' removed`);
|
||||
lastNode = RemoveNode;
|
||||
resetAllListeners();
|
||||
break; // Node has gone stale, stop applying visitors to it
|
||||
} else if (result.__type === "defer") {
|
||||
deferFuncs.push(result.func);
|
||||
continue;
|
||||
} else if (result.__raqbASTNode === true) {
|
||||
// New subtree to replace the old one
|
||||
if (result === node) {
|
||||
// Visitor returned the original node again; but in this case, it should return NoChange instead. We enforce this because after future changes to the optimizer implementation (eg. using an internally-mutable deep copy of the tree), we may no longer be able to *reliably* detect when the original node is returned; so it's best to already get people into the habit of returning a NoChange marker in those cases, by disallowing this.
|
||||
throw new Error(`Visitor returned original node, but this may not work reliably; if you intended to leave the node untouched, return a NoChange marker instead`);
|
||||
} else {
|
||||
debuggers[visitor.name](`Node of type '${typeOf(lastNode)}' replaced by node of type '${typeOf(result)}'`);
|
||||
lastNode = result;
|
||||
resetAllListeners();
|
||||
break; // Node has gone stale, stop applying visitors to it
|
||||
}
|
||||
} else {
|
||||
throw new Error(`Visitor returned an unexpected type of return value: ${util.inspect(result)}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (lastNode !== node) {
|
||||
// We re-evalue the new node before leaving control to the children handler, as the old one has been substituted, and therefore new visitors might be applicable.
|
||||
// FIXME: Is RemoveNode getting handled correctly here?
|
||||
// FIXME: This needs to be moved outside of this function to also re-apply other visitors correctly
|
||||
return applyVisitors(lastNode);
|
||||
} else {
|
||||
return lastNode;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
node = applyVisitors(node);
|
||||
|
||||
|
||||
// FIXME: Eventually hardcode the available properties for different node types (and them being single/multiple), for improved performance?
|
||||
let changedProperties = {};
|
||||
|
||||
for (let [ property, value ] of Object.entries(node)) {
|
||||
if (value == null) {
|
||||
continue;
|
||||
} else if (value.__raqbASTNode === true) {
|
||||
let newValue = handle(value, stateHandlers);
|
||||
|
||||
if (newValue !== value) {
|
||||
changedProperties[property] = newValue;
|
||||
}
|
||||
} else if (Array.isArray(value) && value.length > 0 && value[0].__raqbASTNode === true) {
|
||||
// NOTE: We assume that if an array in an AST node property contains one AST node, *all* of its items are AST nodes. This should be ensured by the input wrapping in the operations API.
|
||||
// eslint-disable-next-line no-loop-func
|
||||
let newValues = value.map((item) => handle(item, stateHandlers));
|
||||
|
||||
if (newValues.some((newValue, i) => newValue !== value[i])) {
|
||||
changedProperties[property] = newValues.filter((item) => item !== RemoveNode);
|
||||
}
|
||||
} else {
|
||||
// Probably some kind of literal value; we don't touch these.
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (Object.keys(changedProperties).length === 0) {
|
||||
return node;
|
||||
} else {
|
||||
let newNode = Object.assign({}, node, changedProperties);
|
||||
|
||||
// FIXME: Think carefully about whether there is *ever* a valid reason to remove a single node! As array items are already taken care of above, and leave an empty array at worst, which can make sense. Possibly we even need to encode this data into node type metadata.
|
||||
for (let [ key, value ] of Object.entries(newNode)) {
|
||||
if (value === RemoveNode) {
|
||||
delete newNode[key];
|
||||
}
|
||||
}
|
||||
|
||||
return newNode;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// FIXME: Possibly optimize the "node gets returned unchanged" case, somehow? Perhaps by propagating the NoChange marker? But object creation is fast, so that may actually make things slower than just blindly creating new objects...
|
||||
// return syncpipe(node, [
|
||||
// (_) => applyVisitors(_),
|
||||
// (_) => handleChildren(_)
|
||||
// ]);
|
||||
}
|
||||
|
||||
let { value: rootNode, time } = measureTime(() => {
|
||||
return handle(ast, {});
|
||||
});
|
||||
|
||||
// let timeSpentInOptimizers = Object.values(timings).reduce((sum, n) => sum + n, 0n);
|
||||
let timeSpentInOptimizers = Object.values(timings).reduce((sum, n) => sum + n, 0);
|
||||
|
||||
if (rootNode !== RemoveNode) {
|
||||
return {
|
||||
ast: rootNode,
|
||||
timings: {
|
||||
"# Total": time,
|
||||
"# Walker overhead": time - timeSpentInOptimizers,
|
||||
... timings,
|
||||
}
|
||||
};
|
||||
} else {
|
||||
unreachable("Root node was removed");
|
||||
}
|
||||
};
|
@ -0,0 +1,85 @@
|
||||
"use strict";
|
||||
/* eslint-disable no-undef */
|
||||
|
||||
/*
|
||||
SELECT
|
||||
reviews.movie_id,
|
||||
movie.title AS title,
|
||||
COUNT(reviews.*) AS positive_review_count
|
||||
FROM reviews
|
||||
JOIN
|
||||
movies AS movie ON reviews.movie_id = movie.id
|
||||
WHERE
|
||||
reviews.rating > 3
|
||||
AND movie.title LIKE '%Movie%'
|
||||
GROUP BY (reviews.movie_id, movie.id);
|
||||
*/
|
||||
query = select("reviews", [
|
||||
define({ movie: belongsTo("movie_id") }),
|
||||
where({
|
||||
rating: moreThan(3),
|
||||
movie: { title: includes("Movie") }
|
||||
}),
|
||||
collapseBy("movie_id", [
|
||||
compute({ positive_review_count: count() }),
|
||||
renameColumn("movie.title", "title")
|
||||
])
|
||||
]);
|
||||
|
||||
|
||||
/*
|
||||
SELECT
|
||||
movies.*,
|
||||
COUNT(reviews_1.*) AS positive_review_count
|
||||
FROM movies
|
||||
LEFT JOIN
|
||||
reviews AS reviews_1 ON
|
||||
movies.id = reviews_1.movie_id
|
||||
AND reviews_1.rating > 3
|
||||
GROUP BY movies.id;
|
||||
|
||||
NOTES:
|
||||
- Must be a LEFT JOIN, to ensure that the rows from the parent table are always present
|
||||
- Foreign predicates are in the ON, not the WHERE; that way they filter the foreign inputs, not the JOIN output?
|
||||
MARKER: Continue fleshing out the mental model for all these different features, and how they translate to SQL. Also figure out where *non*-relation virtual tables fit into the picture, with aggregrates and such - especially whether they also require the primary-key GROUP BY.
|
||||
*/
|
||||
query = select("movies", [
|
||||
compute({ // better name for withDerived?
|
||||
positive_review_count: count(has("reviews.movie_id", [
|
||||
where({ rating: moreThan(3) })
|
||||
]))
|
||||
})
|
||||
]);
|
||||
|
||||
query = select("movies", [
|
||||
define({ reviews_1: has("reviews.movie_id", [
|
||||
where({ rating: moreThan(3) })
|
||||
])}),
|
||||
compute({ // better name for withDerived?
|
||||
positive_review_count: count("reviews_1")
|
||||
})
|
||||
]);
|
||||
|
||||
|
||||
/*
|
||||
SELECT
|
||||
movies.*,
|
||||
COUNT(reviews_1.*) AS positive_review_count
|
||||
FROM movies
|
||||
JOIN
|
||||
reviews AS reviews_1 ON movies.id = reviews_1.movie_id
|
||||
WHERE reviews_1.rating > 3
|
||||
GROUP BY movies.id;
|
||||
*/
|
||||
query = select("movies", [
|
||||
define({
|
||||
releases: has("releases.movie_id"),
|
||||
positive_reviews: has("reviews.movie_id", [
|
||||
where({ rating: moreThan(3) })
|
||||
])
|
||||
}),
|
||||
compute({ // better name for withDerived?
|
||||
positive_review_count: count("positive_reviews"),
|
||||
initial_release_year: lowestOf("releases.year")
|
||||
})
|
||||
]);
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,434 @@
|
||||
"use strict";
|
||||
|
||||
// JOIN
|
||||
query = combine([
|
||||
select("nodes", [
|
||||
where({ id: parameter("id") })
|
||||
]),
|
||||
select("node_revisions", [
|
||||
where({ node_id: foreignColumn("nodes.id") })
|
||||
])
|
||||
]);
|
||||
|
||||
// UNION
|
||||
let whereClause = { id: parameter("id") };
|
||||
|
||||
query = concatenate([
|
||||
select("old_nodes", [ whereClause ]),
|
||||
select("new_nodes", [ whereClause ]),
|
||||
]);
|
||||
|
||||
// GROUP BY / aggregrates
|
||||
query = select("weather_reports", [
|
||||
where({ city: startsWith("S") }),
|
||||
collapseBy("city", [
|
||||
compute({ maximum_temperature: highestOf("temperature") }),
|
||||
where({ maximum_temperature: lessThan(40) })
|
||||
])
|
||||
]);
|
||||
|
||||
query = select("reviews", [
|
||||
where({ rating: moreThan(3) }),
|
||||
collapseBy([ "movie_source", "movie_id" ], [
|
||||
compute({ positive_review_count: count() })
|
||||
])
|
||||
]);
|
||||
|
||||
// movies: data_source, data_id, title, PRIMARY(data_source, data_id)
|
||||
// reviews: id, movie_source, movie_id, rating, FOREIGN(movie_source, movie_id) -> movies .data_source, .data_id
|
||||
// intended output: [ title, count ] for each movie whose title starts with an A
|
||||
|
||||
// NOTE: Conceptually, a relation is a 'virtual table' that references all the associated foreign rows for a given local row
|
||||
|
||||
// For something like the below, actually return the `movie.title` field as a column named exactly that by default; using quotes to be able to include the dot. Nevertheless the user might want to *rename* certain columns in a constrained query like a collapseBy query, where the columns are normally automatically determined based on the specified computations and grouping conditions; neither addColumns nor onlyColumns make semantic sense for that (the latter should be reserved for removing certain columns from the results, and *should* work with collapseBy, just with constrained valid column names!), so we probably need a renameColumn construct that gets rid of the original `movie.title` column and eg. replaces it with `movie_title`.
|
||||
// FIXME: Think carefully about how the semantics of belongsTo would change when used in defineAs rather than withRelations. Ideally the semantics shouldn't change at all. However, if it's also possible for someone to specify a `has` relation (which points at *multiple* foreign rows), then we need a way to deal with zero-or-more rows here anyway, maybe that can be the same semantics as in withRelations?
|
||||
// I guess that "there must be at least one foreign row that matches the specified constraints" is a reasonable criterium for the local row to be includer or not? Then there should probably be a LIMIT 1 on the foreign query, to ensure that it only ever uses the values of whatever the first-encountered row is. This means there would be undefined behaviour when referring variable fields, though.
|
||||
// NOTE: Make sure that any design around this is also capable of dealing with composite keys! Maybe require a composite(...) wrapper for those, to distinguish from other array-ish things...
|
||||
// Definitely need this to distinguish from GROUPING SETS in GROUP BY (which is also an array); otherwise ["brand", "size"] would be ambiguous.
|
||||
|
||||
/*
|
||||
SELECT
|
||||
reviews.movie_id,
|
||||
COUNT(reviews.*) AS positive_review_count
|
||||
FROM reviews
|
||||
JOIN
|
||||
movies ON reviews.movie_id = movies.id
|
||||
WHERE
|
||||
reviews.rating > 3
|
||||
GROUP BY reviews.movie_id;
|
||||
*/
|
||||
query = select("reviews", [
|
||||
define({ movie: belongsTo("movie_id") }),
|
||||
where({
|
||||
rating: moreThan(3),
|
||||
movie: { title: includes("Movie") }
|
||||
}),
|
||||
collapseBy("movie_id", [
|
||||
compute({ positive_review_count: count() }),
|
||||
renameColumn("movie.title", "title")
|
||||
])
|
||||
]);
|
||||
|
||||
// FIXME: What if we were to collapse by ID and then want to also get the movie title?
|
||||
|
||||
// reviews: id, movie_id, rating
|
||||
// movies: id, title, release_year
|
||||
query = select("movies", [
|
||||
define({ reviews: has("reviews.movie_id") }),
|
||||
where({
|
||||
title: startsWith("A"),
|
||||
reviews: { rating: moreThan(3) }
|
||||
}),
|
||||
collapseBy("release_year", [
|
||||
compute({ positive_review_count: count("reviews") }), // FIXME: Ambiguity between table and column names here, maybe just default to table? Since column counts are rare
|
||||
])
|
||||
]);
|
||||
|
||||
// The below is functionally identical to the above; it is generally permitted to specify WHERE constraints for `define`d relations in the top-level WHERE
|
||||
query = select("movies", [
|
||||
define({ reviews: has("reviews.movie_id", [ where({ rating: moreThan(3) }) ]) }),
|
||||
where({ title: contains("Movie") }),
|
||||
collapseBy("release_year", [
|
||||
compute({ positive_review_count: count("reviews") }), // FIXME: Ambiguity between table and column names here, maybe just default to table? Since column counts are rare
|
||||
])
|
||||
]);
|
||||
|
||||
// reviews: id, movie_id, rating
|
||||
// movies: id, title, release_year
|
||||
// SELECT movies.release_year, COUNT(reviews.*) AS positive_review_count FROM movies JOIN reviews ON movies.id = reviews.movie_id WHERE reviews.rating > 3 AND movies.title LIKE '%Movie%' GROUP BY movies.release_year;
|
||||
// Differences from query below:
|
||||
// 1) GROUP BY movies.release_year instead of primary key,
|
||||
// 2) SELECT movies.release_year instead of movies.*, as per collapseBy semantics
|
||||
// 3) Additional WHERE clause for movies.title
|
||||
query = select("movies", [
|
||||
where({ title: contains("Movie") }),
|
||||
collapseBy("release_year", [
|
||||
compute({
|
||||
positive_review_count: count(has("reviews.movie_id", [
|
||||
{ rating: moreThan(3) }
|
||||
]))
|
||||
}),
|
||||
])
|
||||
]);
|
||||
|
||||
/*
|
||||
SELECT
|
||||
movies.*,
|
||||
COUNT(reviews.*) AS positive_review_count
|
||||
FROM movies
|
||||
JOIN
|
||||
reviews ON movies.id = reviews.movie_id
|
||||
WHERE reviews.rating > 3
|
||||
GROUP BY movies.id;
|
||||
*/
|
||||
// NOTE: The GROUP BY is necessary because otherwise COUNT (which is an aggregrate function) cannot be used; the field name to use there, however, cannot be statelessly determined as it requires information about the primary key of the table
|
||||
// SELECT movies.*, COUNT(reviews.*) OVER (PARTITION BY reviews.movie_id) AS positive_review_count FROM movies JOIN reviews ON movies.id = reviews.movie_id WHERE reviews.rating > 3;
|
||||
// ^ Produces the correct data, but duplicates rows; window functions may be useful for combining aggregrated information and direct rows, though
|
||||
// SELECT DISTINCT movies.*, COUNT(reviews.*) OVER (PARTITION BY reviews.movie_id) AS positive_review_count FROM movies JOIN reviews ON movies.id = reviews.movie_id WHERE reviews.rating > 3;
|
||||
// ^ Works! But may be slower? As it has to actually compare the resultset data, as opposed to eliminating rows by their `id`
|
||||
query = select("movies", [
|
||||
compute({ // better name for withDerived?
|
||||
positive_review_count: count(has("reviews.movie_id", [
|
||||
{ rating: moreThan(3) }
|
||||
]))
|
||||
})
|
||||
]);
|
||||
|
||||
// subqueries and VALUES (virtual tables)
|
||||
// FIXME: subqueries can result in scalar values, somehow, in some cases? Need to look into the details of this
|
||||
query = select("films", [
|
||||
defineAs("archived_rental_rates", select("films", [
|
||||
compute({ average: averageOf("rental_rate") }),
|
||||
where({ archived: true })
|
||||
])),
|
||||
// Alternatively?
|
||||
defineAs("rental_rates.average", averageOf("rental_rate")),
|
||||
where({ rental_rate: moreThan(foreignColumn("archived_rental_rates.average")) })
|
||||
]);
|
||||
|
||||
let virtualCustomersTable = virtualTable([{
|
||||
first_name: "anne",
|
||||
last_name: "smith",
|
||||
age: 42
|
||||
}, {
|
||||
first_name: "bob",
|
||||
last_name: "jones",
|
||||
age: 28
|
||||
}, {
|
||||
first_name: "joe",
|
||||
last_name: "blow",
|
||||
age: 49
|
||||
}]);
|
||||
|
||||
query = select(virtualCustomersTable, [
|
||||
where({ first_name: "anne" })
|
||||
]);
|
||||
|
||||
query = select("customers", [
|
||||
defineAs("target_customers", virtualCustomersTable),
|
||||
// withQueryAs("average_age", select("target_customers", [
|
||||
// compute({ average: averageOf("age") })
|
||||
// ])),
|
||||
// NOTE: The below would need to be translated into an aggregrate subquery that is then referenced from the parent query; this case is identified by an aggregrate function being called with a *foreign* column name. For this, it may need to additionally insert data into the AST that signals to an optimizer that a subquery needs to be produced somewhere further up the AST
|
||||
// FIXME: Ambiguity between "foreign column in a JOIN" and "foreign column referring to a virtual subquery table"; especially in the case where combine and defineAs are both used, and the scoping in the raqb might differ from that in the SQL query? Though it probably isn't allowed to redefine names like that anyway?
|
||||
where({ age: moreThan(averageOf("target_customers.age")) })
|
||||
]);
|
||||
|
||||
query = select("fdt", [
|
||||
where({
|
||||
c1: anyOf(listFrom(select("t2", [
|
||||
onlyColums([ "c3" ]),
|
||||
where({ c2: add([
|
||||
foreignColumn("fdt.c1"),
|
||||
10
|
||||
]) })
|
||||
])))
|
||||
})
|
||||
]);
|
||||
|
||||
query = select("table1", [
|
||||
where({
|
||||
column1: anyOf(listFrom("table2", "column3"), [
|
||||
where({ column2: add([ 10, foreignColumn("table1.column1") ]) })
|
||||
])
|
||||
})
|
||||
]);
|
||||
|
||||
// https://docs.actian.com/actianx/11.1/index.html#page/SQLRef/Scalar_Subqueries.htm
|
||||
// NOTE: `compute` assumes the value side to implicitly be a possiblyForeignColumnName. Literal values must be explicit.
|
||||
// NOTE: There is a second form of `compute` that only produces a single, scalar value.
|
||||
let highestSalary = valueFrom("employees", compute(highestOf("salary")));
|
||||
let averageCommission = valueFrom("commissions", compute(averageOf("bonus")));
|
||||
|
||||
query = select("employees", [
|
||||
define("department", belongsTo("department_id")),
|
||||
where({ department: { name: "finance" } }),
|
||||
compute({
|
||||
employee_name: "emp_name",
|
||||
department_name: "department.name",
|
||||
average_commission: averageCommission,
|
||||
highest_salary: highestSalary
|
||||
})
|
||||
]);
|
||||
|
||||
// More complex version of the above, comparing only salaries in Finance
|
||||
let financeEmployees = select("employees", [
|
||||
define("department", belongsTo("department_id")),
|
||||
where({ department: { name: "finance" } })
|
||||
]);
|
||||
|
||||
query = select("employees", [
|
||||
define("department", belongsTo("department_id")),
|
||||
define("commissions", select("commissions")),
|
||||
define("finance_employees", financeEmployees),
|
||||
where({ department: { name: "finance" } }),
|
||||
compute({
|
||||
employee_name: "emp_name",
|
||||
department_name: "department.name",
|
||||
average_commission: averageOf("commissions.bonus"),
|
||||
average_salary: averageOf("finance_employees.salary"),
|
||||
highest_salary: highestOf("finance_employees.salary")
|
||||
})
|
||||
]);
|
||||
|
||||
query = select("sales", [
|
||||
collapseBy([ "product_id", hierarchical([ "country_id", "city_id", "store_id" ]) ], [
|
||||
compute({
|
||||
total_sold: count(),
|
||||
total_revenue: sum("revenue")
|
||||
})
|
||||
])
|
||||
]);
|
||||
|
||||
// Or, when the data is in normalized form:
|
||||
query = select("sales", [
|
||||
define({
|
||||
country: belongsTo("city.country_id"),
|
||||
city: belongsTo("store.city_id"),
|
||||
store: belongsTo("store_id")
|
||||
}),
|
||||
collapseBy([ "product_id", hierarchical([ "country.id", "city.id", "store_id" ]) ], [
|
||||
compute({
|
||||
total_sold: count(),
|
||||
total_revenue: sum("revenue")
|
||||
})
|
||||
])
|
||||
]);
|
||||
|
||||
// Or, inverted:
|
||||
query = select("countries", [
|
||||
define({
|
||||
city: has("cities.country_id"),
|
||||
store: has("stores.city_id"),
|
||||
sales: has("sales.store_id")
|
||||
}),
|
||||
collapseBy([ "sales.product_id", hierarchical([ "id", "city.id", "store_id" ]) ], [
|
||||
compute({
|
||||
total_sold: count("sales"),
|
||||
total_revenue: sum("sales.revenue")
|
||||
})
|
||||
])
|
||||
]);
|
||||
|
||||
// Multiple collapseBy clauses:
|
||||
// SELECT color, size, (CASE WHEN size IS NOT NULL THEN SUM(price) ELSE COUNT(*) END) AS total_sales FROM sales GROUP BY GROUPING SETS ( color, (color, size) );
|
||||
// This introduces a lot of query analysis complexity, let's not implement it unless someone actually needs it...
|
||||
query = select("sales", [
|
||||
collapseBy("color", [
|
||||
compute({ total_sales: count() })
|
||||
]),
|
||||
collapseBy(permutationsOf([ "color", "size" ]), [
|
||||
compute({ total_sales: sum("price") })
|
||||
])
|
||||
]);
|
||||
|
||||
// FIXME: count
|
||||
|
||||
// FIXME: Special case, handling clauses that cannot exist within an ON, eg. ORDER BY
|
||||
// FIXME: Test duplicate fields, in different WHEREs, eg. number_three
|
||||
// NOTE: Need to use table aliases for self-combines/self-relations
|
||||
// FIXME: cartesianProduct(...) wrapper for combine. in addition to anyOf/allOf
|
||||
// FIXME: startsWith -> LIKE FOO%, endsWith -> LIKE %FOO, includes -> LIKE %FOO% -- figure out input escaping here!
|
||||
// FIXME: does LIKE work with ANY/ALL?
|
||||
// FIXME: Performance-class optimizer that moves non-aggregrate conditions out of the collapseBy conditions and into the parent select
|
||||
// FIXME: Document that collapseBy will override the selected columns, and disallow the usage of addColumns/onlyColumns (and why that is)
|
||||
// FIXME: Actually implement that limitation, too!
|
||||
// FIXME: Is there an 'array' aggregrate function for when someone wants all of the unique items within a group, or something like that?
|
||||
// FIXME: optionalArrayOf combinator, and permit omitting the array in all cases where an array is expected and an array of 1 item would be valid
|
||||
// In practice this means permitting omitting the array (and implicitly wrapping it) anywhere, since it will then hit the regular item count check anyway.
|
||||
// FIXME: Disallow the use of both compute and collapseBy wthin a `select`; require the compute to be inside the collapseBy, in that case
|
||||
// FIXME: Table functions in place of a table name
|
||||
// FIXME: "create function" and "create view" interfaces
|
||||
// NOTE: Ensure that no matter where column references are provided, local column references *always* refer to the currently-containing context
|
||||
// Need to go through all the possible combinations of constructs, and verify that this holds true, also for eg. sub-queries
|
||||
// NOTE: Sometimes subqueries should be defined using WITH rather than inline or in a FROM, eg. if they do not appear in the output through a JOIN
|
||||
// FIXME: IN ... clauses can accept subqueries, is this also true for ANY/ALL?
|
||||
// NOTE: Subqueries in WHERE clauses can reference the outer query (lexical scoping), ie. the item being evaluated! Make sure that this is represented well in the API design.
|
||||
// FIXME: Have an 'optimizer' (new 'correctness' category?) that verifies that all foreign column references are semantically valid inside of a given query? Are there cases where some sort of predefined table can be accessed without defining it in the query? If yes, it should probably be required to define this upfront in raqb queries.
|
||||
// FIXME: Figure out how ordering relates to JOINs
|
||||
// FIXME: JSON queries, via jsonQuery({ ... }) wrapper or so
|
||||
// FIXME: Implement nested WHERE for foreign constraints
|
||||
// FIXME: Bulk inserts! Should make sure that VALUES (which is probably needed for this) does not require a parameter for every single individual value
|
||||
// FIXME: Figure out a solution for update/delete with LIMIT
|
||||
// FIXME: initializeWith for column additions in schema (create with nullable -> update -> set non-nullable)
|
||||
// This is different from defaultTo(...), which continues to be applicable *after* creating the column
|
||||
// FIXME: Non-parameterizable placeholder representation, for dealing with things like currently-unknown primary keys, that will only be known upon query execution (as the executor knows the table structure)
|
||||
// FIXME: Replace aliases with `compute`
|
||||
|
||||
query = createTable("films", {
|
||||
fields: {
|
||||
id: primaryKey(),
|
||||
title: text(),
|
||||
description: [ text(), canBeNull() ],
|
||||
|
||||
},
|
||||
indexes: [
|
||||
compositeIndex([ "data_source", "data_source_id" ])
|
||||
]
|
||||
});
|
||||
|
||||
combine(/* [ clause ], allOf([ clause ]), anyOf([ clause]) */);
|
||||
|
||||
|
||||
|
||||
// let niceNumbers = anyOf([ 1, 2, 3 ]);
|
||||
|
||||
// query = select("projects", [
|
||||
// where({
|
||||
// number_one: niceNumbers,
|
||||
// number_two: niceNumbers
|
||||
// }),
|
||||
// where({
|
||||
// number_three: anyOf([ 42, column("number_one") ]),
|
||||
// number_four: 1337
|
||||
// })
|
||||
// ]);
|
||||
|
||||
|
||||
// query = select("projects", [
|
||||
// where({
|
||||
// // foo: anyOf([ "bar", not(not("baz")), anyOf([ "bar2", "baz2" ]), unsafeSQL("TRUE") ]),
|
||||
// // qux: anyOf([ 13, moreThan(42) ]),
|
||||
// complex: anyOf([
|
||||
// 30,
|
||||
// 40,
|
||||
// allOf([
|
||||
// moreThan(100),
|
||||
// lessThan(200),
|
||||
// lessThan(parameter("max"))
|
||||
// ])
|
||||
// ])
|
||||
// }),
|
||||
// // where({ second: 2 }),
|
||||
// // where(not({
|
||||
// // thirdA: 3,
|
||||
// // thirdB: 3
|
||||
// // })),
|
||||
// // where(anyOf([ { foo: "bar" } ]))
|
||||
// ]);
|
||||
|
||||
// query = select("projects", [
|
||||
// onlyColumns([
|
||||
// "foo",
|
||||
// alias("bar", 42),
|
||||
// alias("baz", sql("foo"))
|
||||
// ]),
|
||||
// where(anyOf([
|
||||
// { foo: "bar", qux: anyOf([ "quz", "quy" ]) },
|
||||
// { baz: lessThan(42) }
|
||||
// ]))
|
||||
// ]);
|
||||
|
||||
/* {
|
||||
query: 'SELECT foo, ? AS bar, foo AS baz FROM projects WHERE foo = ? OR baz < ?;',
|
||||
params: [ 42, 'bar', 42 ],
|
||||
placeholders: []
|
||||
} */
|
||||
|
||||
|
||||
|
||||
// query = select("projects", [
|
||||
// onlyColumns([ "id", "name" ]),
|
||||
// where({
|
||||
// active: true,
|
||||
// visible: true,
|
||||
// // primary_category_id: anyOf([ 2, 3, 5, 7, 8 ])
|
||||
// // primary_category_id: anyOf(parameter("categoryIDs"))
|
||||
// primary_category_id: not(anyOf(parameter("categoryIDs"))) // FIXME/MARKER: This gets stringified wrong!
|
||||
// }),
|
||||
// // FIXME: where pivot table entry exists for category in that list
|
||||
// withRelations({
|
||||
// primaryCategory: belongsTo("primary_category_id", { query: [ withOwner() ] }),
|
||||
// categories: through([
|
||||
// has("projects_categories.project_id", { query: [
|
||||
// // Optional extra clauses for the query on the pivot table, eg. for filtering entries
|
||||
// where({ adminApproved: true })
|
||||
// ]}),
|
||||
// "category_id"
|
||||
// ]),
|
||||
|
||||
// // all user groups for a given project ID -> all memberships for the given user group IDs -> for each membership, the record referenced by the given user_id
|
||||
// users: through([ "user_groups.project_id", "membership.user_group_id", "user_id" ]),
|
||||
// // ... expands to ...
|
||||
// // users: through([
|
||||
// // has({ column: foreignColumn({ table: "user_groups", column: "project_id" }) }),
|
||||
// // has({ column: foreignColumn({ table: "memberships", column: "user_group_id" }) }),
|
||||
// // belongsTo({ column: column("user_id") }),
|
||||
// // ]),
|
||||
|
||||
// owner: "owner_id",
|
||||
// // ... expands to
|
||||
// // owner: belongsTo({ column: "owner_id" }),
|
||||
|
||||
// releases: "releases.project_id",
|
||||
// // ... expands to ...
|
||||
// // releases: has({ column: "releases.project_id" })
|
||||
// }),
|
||||
// withDerived({
|
||||
// capitalized_name: sql("UPPER(name)"),
|
||||
// team_count: sql("moderator_count + admin_count"),
|
||||
// // fourty_two: value(42), // This makes no sense in withDerived!
|
||||
// name_distance: (project) => wordDistanceAlgorithm(project.name, "someReferenceName") // NOTE: This could have returned a Promise!
|
||||
// }),
|
||||
// mapCase({ from: "snake", to: "camel" })
|
||||
// ]);
|
@ -0,0 +1,20 @@
|
||||
"use strict";
|
||||
|
||||
module.exports = function combineOptimizers(optimizers) {
|
||||
let allVisitors = {};
|
||||
|
||||
for (let optimizer of optimizers) {
|
||||
for (let [ key, visitor ] of Object.entries(optimizer.visitors)) {
|
||||
if (allVisitors[key] == null) {
|
||||
allVisitors[key] = [];
|
||||
}
|
||||
|
||||
allVisitors[key].push({
|
||||
name: optimizer.name,
|
||||
func: visitor
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return allVisitors;
|
||||
};
|
@ -0,0 +1,16 @@
|
||||
"use strict";
|
||||
|
||||
const debug = require("debug");
|
||||
|
||||
module.exports = function createDebuggers(optimizers) {
|
||||
let debuggers = {};
|
||||
|
||||
for (let optimizer of optimizers) {
|
||||
debuggers[optimizer.name] = debug(`raqb:ast:optimize:${optimizer.name}`);
|
||||
debuggers[`${optimizer.name} (deferred)`] = debug(`raqb:ast:optimize:${optimizer.name} (deferred)`);
|
||||
}
|
||||
|
||||
debuggers["(subtree change)"] = debug(`raqb:ast:optimize:(subtree change)`);
|
||||
|
||||
return debuggers;
|
||||
};
|
@ -0,0 +1,27 @@
|
||||
"use strict";
|
||||
|
||||
module.exports = function createHandlerTracker() {
|
||||
let handlers = new Map();
|
||||
|
||||
return {
|
||||
add: function (name, func) {
|
||||
if (!handlers.has(name)) {
|
||||
handlers.set(name, []);
|
||||
}
|
||||
|
||||
handlers.get(name).push(func);
|
||||
},
|
||||
call: function (name, value) {
|
||||
let funcs = handlers.get(name);
|
||||
|
||||
if (funcs != null) {
|
||||
for (let func of funcs) {
|
||||
func(value);
|
||||
}
|
||||
}
|
||||
},
|
||||
has: function (name) {
|
||||
return handlers.has(name);
|
||||
}
|
||||
};
|
||||
};
|
@ -0,0 +1,12 @@
|
||||
"use strict";
|
||||
|
||||
module.exports = function createTimings(optimizers) {
|
||||
let timings = {};
|
||||
|
||||
for (let optimizer of optimizers) {
|
||||
// timings[optimizer.name] = 0n;
|
||||
timings[optimizer.name] = 0;
|
||||
}
|
||||
|
||||
return timings;
|
||||
};
|
@ -1,5 +1,11 @@
|
||||
"use strict";
|
||||
|
||||
module.exports = function concat(arrays) {
|
||||
return arrays[0].concat(... arrays.slice(1));
|
||||
if (arrays.length === 0) {
|
||||
return [];
|
||||
} else if (arrays.length === 1) {
|
||||
return arrays[0];
|
||||
} else {
|
||||
return arrays[0].concat(... arrays.slice(1));
|
||||
}
|
||||
};
|
||||
|
@ -0,0 +1,8 @@
|
||||
"use strict";
|
||||
|
||||
const merge = require("./merge");
|
||||
const node = require("./ast-node");
|
||||
|
||||
module.exports = function deriveNode(source, newProperties) {
|
||||
return node(merge(source, newProperties));
|
||||
};
|
@ -0,0 +1,11 @@
|
||||
"use strict";
|
||||
|
||||
module.exports = function findLastIndex(array, predicate) {
|
||||
for (let i = array.length - 1; i >= 0; i--) {
|
||||
if (predicate(array[i])) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
};
|
@ -0,0 +1,28 @@
|
||||
"use strict";
|
||||
|
||||
const { validateArguments } = require("@validatem/core");
|
||||
const required = require("@validatem/required");
|
||||
const arrayOf = require("@validatem/array-of");
|
||||
|
||||
const node = require("../ast-node");
|
||||
|
||||
module.exports = function (operations) {
|
||||
const isCollapsibleColumn = require("../validators/operations/is-collapsible-column")(operations);
|
||||
const isCollapseByClause = require("../validators/operations/is-collapse-by-clause")(operations);
|
||||
|
||||
return function collapseBy(_columns) {
|
||||
let [ columns, clauses ] = validateArguments(arguments, {
|
||||
columns: [ required, arrayOf([ required, isCollapsibleColumn ]) ],
|
||||
clauses: [ arrayOf([ isCollapseByClause ]) ]
|
||||
});
|
||||
|
||||
return node({
|
||||
type: "collapseBy",
|
||||
columns: node({
|
||||
type: "collapseByColumns",
|
||||
columns: columns
|
||||
}),
|
||||
clauses: clauses
|
||||
});
|
||||
};
|
||||
};
|
@ -0,0 +1,47 @@
|
||||
"use strict";
|
||||
|
||||
const { validateArguments } = require("@validatem/core");
|
||||
const either = require("@validatem/either");
|
||||
const required = require("@validatem/required");
|
||||
const anyProperty = require("@validatem/any-property");
|
||||
const isString = require("@validatem/is-string");
|
||||
|
||||
const unreachable = require("../unreachable");
|
||||
const node = require("../ast-node");
|
||||
const tagAsType = require("../validators/tag-as-type");
|
||||
|
||||
module.exports = function (operations) {
|
||||
const isComputable = require("../validators/operations/is-computable")(operations);
|
||||
|
||||
return function compute(_items) {
|
||||
let [ items ] = validateArguments(arguments, {
|
||||
// FIXME: Add support for scalar compute
|
||||
// items: [ required, either([
|
||||
// [ isComputable, tagAsType("single") ],
|
||||
// [ anyProperty({
|
||||
// key: [ required, isString ],
|
||||
// value: [ required, isComputable ]
|
||||
// }), tagAsType("multiple") ]
|
||||
// ]) ]
|
||||
items: [ required, anyProperty({
|
||||
key: [ required, isString ],
|
||||
value: [ required, isComputable ]
|
||||
}), tagAsType("multiple") ]
|
||||
});
|
||||
|
||||
if (items.type === "multiple") {
|
||||
return node({
|
||||
type: "computeMultiple",
|
||||
items: Object.entries(items.value).map(([ key, value ]) => {
|
||||
return node({
|
||||
type: "compute",
|
||||
column: operations.column(key),
|
||||
expression: value
|
||||
});
|
||||
})
|
||||
});
|
||||
} else {
|
||||
unreachable(`Invalid tagged type '${items.type}'`);
|
||||
}
|
||||
};
|
||||
};
|
@ -0,0 +1,27 @@
|
||||
"use strict";
|
||||
|
||||
const { validateArguments } = require("@validatem/core");
|
||||
const required = require("@validatem/required");
|
||||
|
||||
const node = require("../ast-node");
|
||||
|
||||
module.exports = function count(operations) {
|
||||
const isTable = require("../validators/operations/is-table")(operations);
|
||||
|
||||
return function count(_table) {
|
||||
let [ table ] = validateArguments(arguments, {
|
||||
table: [ isTable ]
|
||||
});
|
||||
|
||||
// TODO: Investigate whether this can be made more performant by counting a specific column rather than *. That would probably require stateful knowledge of the database schema, though.
|
||||
let columnReference = (table != null)
|
||||
? operations.foreignColumn({ table: table, column: "*" }) // FIXME: Make sure not to break this (internally) when making column name checks more strict
|
||||
: operations.column("*");
|
||||
|
||||
return node({
|
||||
type: "aggregrateFunction",
|
||||
functionName: "count",
|
||||
args: [ columnReference ]
|
||||
});
|
||||
};
|
||||
};
|
@ -0,0 +1,22 @@
|
||||
"use strict";
|
||||
|
||||
const { validateArguments } = require("@validatem/core");
|
||||
const required = require("@validatem/required");
|
||||
const arrayOf = require("@validatem/array-of");
|
||||
|
||||
const node = require("../ast-node");
|
||||
|
||||
module.exports = function (operations) {
|
||||
const isPossiblyForeignColumn = require("../validators/operations/is-possibly-foreign-column")(operations);
|
||||
|
||||
return function hierarchical(_columns) {
|
||||
let [ columns ] = validateArguments(arguments, {
|
||||
columns: [ required, arrayOf([ required, isPossiblyForeignColumn ]) ] // FIXME: Require minimum 2, probably
|
||||
});
|
||||
|
||||
return node({
|
||||
type: "hierarchical",
|
||||
columns: columns
|
||||
});
|
||||
};
|
||||
};
|
@ -0,0 +1,22 @@
|
||||
"use strict";
|
||||
|
||||
const { validateArguments } = require("@validatem/core");
|
||||
const required = require("@validatem/required");
|
||||
|
||||
const node = require("../ast-node");
|
||||
|
||||
module.exports = function count(operations) {
|
||||
const isPossiblyForeignColumn = require("../validators/operations/is-possibly-foreign-column")(operations);
|
||||
|
||||
return function sum(_column) {
|
||||
let [ column ] = validateArguments(arguments, {
|
||||
column: [ required, isPossiblyForeignColumn ]
|
||||
});
|
||||
|
||||
return node({
|
||||
type: "aggregrateFunction",
|
||||
functionName: "sum",
|
||||
args: [ column ]
|
||||
});
|
||||
};
|
||||
};
|
@ -0,0 +1,157 @@
|
||||
"use strict";
|
||||
|
||||
const NoChange = require("./util/no-change");
|
||||
const deriveNode = require("../derive-node");
|
||||
const operations = require("../operations");
|
||||
const typeOf = require("../type-of");
|
||||
const unreachable = require("../unreachable");
|
||||
const concat = require("../concat");
|
||||
|
||||
const uniqueByPredicate = require("../unique-by-predicate");
|
||||
|
||||
// FIXME: Support for foreign column names
|
||||
|
||||
function uniqueColumns(columns) {
|
||||
return uniqueByPredicate(columns, (column) => column.name);
|
||||
}
|
||||
|
||||
/*
|
||||
valid columns when collapsing:
|
||||
- columns that appear in the collapseBy column list, within or without a hierarchical wrapper
|
||||
- any column that is wrapped in an aggregrate function of some sort
|
||||
*/
|
||||
|
||||
module.exports = {
|
||||
name: "set-collapse-by-columns",
|
||||
category: [ "normalization" ],
|
||||
visitors: {
|
||||
collapseBy: (node, { setState }) => {
|
||||
setState("isCollapsing", true);
|
||||
return NoChange;
|
||||
},
|
||||
columnName: (node, { setState }) => {
|
||||
setState("columnSeen", node);
|
||||
return NoChange;
|
||||
},
|
||||
// FIXME: Think of a generic way to express "only match columns under this specific child property"
|
||||
collapseByColumns: (node, { registerStateHandler, setState, defer }) => {
|
||||
let columns = [];
|
||||
|
||||
registerStateHandler("columnSeen", (node) => {
|
||||
columns.push(node);
|
||||
});
|
||||
|
||||
return defer(() => {
|
||||
setState("setCollapsedColumns", columns);
|
||||
return NoChange;
|
||||
});
|
||||
},
|
||||
addColumns: (node, { setState }) => {
|
||||
setState("setAddColumns", node.columns);
|
||||
return NoChange;
|
||||
},
|
||||
onlyColumns: (node, { setState }) => {
|
||||
setState("setOnlyColumns", node.columns);
|
||||
return NoChange;
|
||||
},
|
||||
aggregrateFunction: (node, { registerStateHandler }) => {
|
||||
// FIXME: Also report isCollapsing here, due to aggregrate function use, but make sure that the error describes this as the (possible) cause
|
||||
return NoChange;
|
||||
},
|
||||
compute: (node, { setState }) => {
|
||||
setState("computeSeen", node);
|
||||
return NoChange;
|
||||
},
|
||||
select: (node, { registerStateHandler, defer }) => {
|
||||
let isCollapsing;
|
||||
let onlyColumns = [];
|
||||
let addColumns = [];
|
||||
let computes = [];
|
||||
let collapsedColumns;
|
||||
|
||||
registerStateHandler("isCollapsing", (value) => {
|
||||
isCollapsing = isCollapsing || value;
|
||||
});
|
||||
|
||||
registerStateHandler("setCollapsedColumns", (columns) => {
|
||||
if (collapsedColumns == null) {
|
||||
collapsedColumns = columns;
|
||||
} else {
|
||||
throw new Error(`You can currently only specify a single 'collapseBy' clause. Please file an issue if you have a reason to need more than one!`);
|
||||
}
|
||||
});
|
||||
|
||||
registerStateHandler("setOnlyColumns", (columns) => {
|
||||
onlyColumns = onlyColumns.concat(columns);
|
||||
});
|
||||
|
||||
registerStateHandler("setAddColumns", (columns) => {
|
||||
addColumns = addColumns.concat(columns);
|
||||
});
|
||||
|
||||
registerStateHandler("computeSeen", (node) => {
|
||||
computes.push(node);
|
||||
});
|
||||
|
||||
return defer(() => {
|
||||
if (isCollapsing) {
|
||||
if (addColumns.length > 0) {
|
||||
let extraColumnNames = addColumns.map((column) => column.name);
|
||||
|
||||
throw new Error(`You tried to add extra columns (${extraColumnNames.join(", ")}) in your query, but this is not possible when using collapseBy. See [FIXME: link] for more information, and how to solve this.`);
|
||||
} else if (onlyColumns.length > 0) {
|
||||
// NOTE: This can happen either because the user specified an onlyColumns clause, *or* because a previous run of this optimizer did so!
|
||||
let uniqueSelectedColumns = uniqueColumns(onlyColumns);
|
||||
let collapsedColumnNames = collapsedColumns.map((column) => column.name);
|
||||
|
||||
let invalidColumnSelection = uniqueSelectedColumns.filter((node) => {
|
||||
let isAggregrateComputation = typeOf(node) === "alias" && typeOf(node.expression) === "aggregrateFunction";
|
||||
let isCollapsedColumn = typeOf(node) === "columnName" && collapsedColumnNames.includes(node.name);
|
||||
|
||||
let isValid = isAggregrateComputation || isCollapsedColumn;
|
||||
|
||||
return !isValid;
|
||||
});
|
||||
|
||||
// FIXME: We can probably optimize this by marking the optimizer-created onlyColumns as inherently-valid, via some sort of node metadata mechanism
|
||||
|
||||
if (invalidColumnSelection.length > 0) {
|
||||
let invalidColumnNames = invalidColumnSelection.map((column) => {
|
||||
let columnType = typeOf(column);
|
||||
|
||||
if (columnType === "columnName") {
|
||||
return column.name;
|
||||
} else if (columnType === "alias") {
|
||||
// FIXME: Show alias target instead of column name here?
|
||||
return column.column.name;
|
||||
} else {
|
||||
return unreachable(`Encountered '${columnType}' node in invalid columns`);
|
||||
}
|
||||
});
|
||||
|
||||
throw new Error(`You tried to include one or more columns in your query (${invalidColumnNames.join(", ")}), that are not used in a collapseBy clause or aggregrate function. See [FIXME: link] for more information.`);
|
||||
} else {
|
||||
return NoChange;
|
||||
}
|
||||
} else {
|
||||
let computeAliases = computes.map((node) => {
|
||||
return operations.alias(node.column, node.expression);
|
||||
});
|
||||
|
||||
return deriveNode(node, {
|
||||
clauses: node.clauses.concat([
|
||||
operations.onlyColumns(concat([
|
||||
collapsedColumns,
|
||||
computeAliases
|
||||
]))
|
||||
])
|
||||
});
|
||||
}
|
||||
}
|
||||
});
|
||||
},
|
||||
}
|
||||
};
|
||||
|
||||
// FIXME: a ConsumeNode marker, like RemoveNode but it does not invalidate that node's state... may need to actually make it a reference, so that a parent node can decide whether to consume that node. Basically passing a "consume this node" function as a state value, that correctly internally triggers the optimizer infrastructure to change the tree as a result.
|
||||
// FIXME: Consume the compute nodes, and have an optimizer that removes empty computeMultiple nodes
|
@ -0,0 +1,85 @@
|
||||
"use strict";
|
||||
|
||||
const NoChange = require("./util/no-change");
|
||||
const deriveNode = require("../derive-node");
|
||||
const operations = require("../operations");
|
||||
|
||||
module.exports = {
|
||||
name: "test-context",
|
||||
category: [ "testing" ],
|
||||
visitors: {
|
||||
columnName: (node, { setState }) => {
|
||||
setState("seenColumn", node.name);
|
||||
|
||||
return NoChange;
|
||||
},
|
||||
select: (node, { registerStateHandler, defer }) => {
|
||||
let seenColumns = new Set();
|
||||
|
||||
registerStateHandler("seenColumnsInWhere", (names) => {
|
||||
for (let name of names) {
|
||||
seenColumns = seenColumns.add(name);
|
||||
}
|
||||
});
|
||||
|
||||
// FIXME: Definitely need better AST modification/derivation tools... probably some sort of deep-modifying utility, for starters. Maybe merge-by-template can be of use here? With a custom AST node merger? It probably doesn't support non-enumerable properties correctly right now, though...
|
||||
|
||||
return defer(() => {
|
||||
console.log("Seen columns in WHERE in SELECT:", seenColumns);
|
||||
|
||||
let onlyColumnsClause = node.clauses.find((clause) => clause.type === "onlyColumns");
|
||||
|
||||
let columnsAlreadyAdded = onlyColumnsClause != null && Array.from(seenColumns).every((column) => {
|
||||
return onlyColumnsClause.columns.some((existingColumn) => existingColumn.name === column);
|
||||
});
|
||||
|
||||
if (!columnsAlreadyAdded) {
|
||||
// NOTE: This is a good test case for optimizer stability! Just returning a derived node in every case.
|
||||
let newOnlyColumnsClause = (onlyColumnsClause == null)
|
||||
? operations.onlyColumns(Array.from(seenColumns))
|
||||
: deriveNode(onlyColumnsClause, {
|
||||
columns: onlyColumnsClause.columns.concat(Array.from(seenColumns).map((columnName) => {
|
||||
return operations.column(columnName);
|
||||
}))
|
||||
});
|
||||
|
||||
return deriveNode(node, {
|
||||
clauses: node.clauses
|
||||
.filter((clause) => clause.type !== "onlyColumns")
|
||||
.concat([ newOnlyColumnsClause ])
|
||||
});
|
||||
} else {
|
||||
return NoChange;
|
||||
}
|
||||
});
|
||||
},
|
||||
where: (node, { registerStateHandler, defer, setState }) => {
|
||||
let seenColumns = [];
|
||||
|
||||
registerStateHandler("seenColumn", (name) => seenColumns.push(name));
|
||||
|
||||
return defer(() => {
|
||||
setState("seenColumnsInWhere", seenColumns);
|
||||
return NoChange;
|
||||
});
|
||||
|
||||
// let seenColumns = [];
|
||||
// let id = Math.random();
|
||||
|
||||
// registerStateHandler("seenColumn", (name) => {
|
||||
// seenColumns.push(name);
|
||||
// });
|
||||
|
||||
// console.log("Scheduling defer", id);
|
||||
|
||||
// return defer(() => {
|
||||
// console.log("Defer called", id);
|
||||
|
||||
// // MARKER: This gets called twice, but should only be called once!
|
||||
// // console.log("Seen columns in WHERE:", seenColumns, require("util").inspect(node, {colors:true,depth:null}));
|
||||
// console.log("Seen columns in WHERE:", seenColumns);
|
||||
// return NoChange;
|
||||
// });
|
||||
}
|
||||
}
|
||||
};
|
@ -0,0 +1,15 @@
|
||||
"use strict";
|
||||
|
||||
module.exports = function uniqueByPredicate(items, predicate) {
|
||||
let seen = new Set();
|
||||
|
||||
return items.filter((item) => {
|
||||
let key = predicate(item);
|
||||
|
||||
if (seen.has(key)) {
|
||||
return false;
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
});
|
||||
};
|
@ -0,0 +1,7 @@
|
||||
"use strict";
|
||||
|
||||
module.exports = function (operations) {
|
||||
const isObjectType = require("./is-object-type")(operations);
|
||||
|
||||
return isObjectType("aggregrateFunction");
|
||||
};
|
@ -0,0 +1,14 @@
|
||||
"use strict";
|
||||
|
||||
const either = require("@validatem/either");
|
||||
|
||||
module.exports = function (operations) {
|
||||
const isObjectType = require("./is-object-type")(operations);
|
||||
const isComputeClause = require("./is-compute-clause")(operations);
|
||||
|
||||
return either([
|
||||
[ isObjectType("where") ],
|
||||
[ isComputeClause ],
|
||||
// [ isObjectType("withRelations") ], // FIXME: Implement support for this
|
||||
]);
|
||||
};
|
@ -0,0 +1,13 @@
|
||||
"use strict";
|
||||
|
||||
const either = require("@validatem/either");
|
||||
|
||||
module.exports = function (operations) {
|
||||
const isObjectType = require("./is-object-type")(operations);
|
||||
const isPossiblyForeignColumn = require("./is-possibly-foreign-column")(operations);
|
||||
|
||||
return either([
|
||||
isPossiblyForeignColumn,
|
||||
isObjectType("hierarchical")
|
||||
]);
|
||||
};
|
@ -0,0 +1,16 @@
|
||||
"use strict";
|
||||
|
||||
const either = require("@validatem/either");
|
||||
|
||||
module.exports = function (operations) {
|
||||
const isObjectType = require("./is-object-type")(operations);
|
||||
const isPossiblyForeignColumn = require("./is-possibly-foreign-column")(operations);
|
||||
|
||||
return either([
|
||||
isObjectType("sqlExpression"),
|
||||
isObjectType("aggregrateFunction"),
|
||||
isObjectType("valueFrom"),
|
||||
isObjectType("literalValue"),
|
||||
isPossiblyForeignColumn
|
||||
]);
|
||||
};
|
@ -0,0 +1,12 @@
|
||||
"use strict";
|
||||
|
||||
const either = require("@validatem/either");
|
||||
|
||||
module.exports = function (operations) {
|
||||
const isObjectType = require("./is-object-type")(operations);
|
||||
|
||||
return either([
|
||||
isObjectType("computeMultiple"),
|
||||
isObjectType("computeSingle"),
|
||||
]);
|
||||
};
|
@ -0,0 +1,15 @@
|
||||
"use strict";
|
||||
|
||||
const wrapError = require("@validatem/wrap-error");
|
||||
const either = require("@validatem/either");
|
||||
const isString = require("@validatem/is-string");
|
||||
|
||||
module.exports = function (operations) {
|
||||
const isObjectType = require("./is-object-type")(operations);
|
||||
const wrapWithOperation = require("./wrap-with-operation")(operations);
|
||||
|
||||
return wrapError("Must be a table name or object", either([
|
||||
[ isObjectType("tableName") ],
|
||||
[ isString, wrapWithOperation("table") ]
|
||||
]));
|
||||
};
|
Loading…
Reference in New Issue