master
Sven Slootweg 4 years ago
parent 6985a0dc47
commit 8bbd02eec8

@ -38408,7 +38408,9 @@ function _typeof(obj) { "@babel/helpers - typeof"; if (typeof Symbol === "functi
module.exports = function typeOf(value) {
// FIXME: Better check
if (_typeof(value) === "object") {
if (value == null) {
return null;
} else if (_typeof(value) === "object") {
return value.type;
} else {
return null;
@ -39433,8 +39435,6 @@ function getInternalBasePath() {
function removeInternalFrames(stack) {
let internalBasePath = getInternalBasePath();
console.log(stack);
if (stack[0].location != null && stack[0].location.path != null && stack[0].location.path.startsWith(internalBasePath)) {
// We are running a normal environment with sensible stacktraces.

@ -0,0 +1,58 @@
"use strict";
const pickRandomWeighted = require("pick-random-weighted");
const syncpipe = require("syncpipe");
let columns = {
color: [ "blue", "black", "brown", "red", "yellow", "gray", "white", "pink", "purple" ],
size: [ "XS", "S", "M", "L", "XL", "XXL" ],
country_id: [ 1, 2, 3, 4, 5 ],
store_id: [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ],
price: [ 2, 5.50, 12, 20, 22.50 ]
};
function shuffle(arrayReference) {
arrayReference.sort(() => Math.random() - 0.5);
}
function shuffleAllColumns() {
for (let key of Object.keys(columns)) {
shuffle(columns[key]);
}
}
let printColumns = Object.keys(columns);
function printItem(item) {
console.log(printColumns.map((column) => item[column]).join(";"));
}
console.log(printColumns.join(";"));
for (let i = 0; i < 200; i++) {
shuffleAllColumns();
// We do this because we *don't* want an even distribution, to make the data more realistic.
let weightedColumns = Object.entries(columns).map(([ key, values ]) => {
return [
key,
values.map((value, i) => {
return [ value, i ];
})
];
});
for (let i = 0; i < 1000; i++) {
let data = syncpipe(weightedColumns, [
(_) => _.map(([ key, values ]) => {
return [
key,
pickRandomWeighted(values)
];
}),
(_) => Object.fromEntries(_)
]);
printItem(data);
}
}

@ -0,0 +1,254 @@
"use strict";
const util = require("util");
const syncpipe = require("syncpipe");
const debug = require("debug");
const defaultValue = require("default-value");
const mapObj = require("map-obj");
const NoChange = require("../../optimizers/util/no-change");
const RemoveNode = require("../../optimizers/util/remove-node");
const typeOf = require("../../type-of");
const unreachable = require("../../unreachable");
const measureTime = require("../../measure-time");
const concat = require("../../concat");
// FIXME: Consider deepcopying the tree once, and then mutating that tree, instead of doing everything immutably; this might be significantly faster when a few iterations are needed to stabilize the tree, as that might otherwise result in many copies of the subtree(s) leeding up to the changed node(s), one for each iteration.
// FIXME: Consider whether inverting the evaluation order (deepest-first rather than shallowest-first) can remove the need for multiple optimization passes and stabilization detection.
// FIXME: Verify that changed nodes actually result in a change in where the walker goes!
function createDebuggers(optimizers) {
let debuggers = {};
for (let optimizer of optimizers) {
debuggers[optimizer.name] = debug(`raqb:ast:optimize:${optimizer.name}`);
}
return debuggers;
}
function createTimings(optimizers) {
let timings = {};
for (let optimizer of optimizers) {
// timings[optimizer.name] = 0n;
timings[optimizer.name] = 0;
}
return timings;
}
function combineOptimizers(optimizers) {
let allVisitors = {};
for (let optimizer of optimizers) {
for (let [ key, visitor ] of Object.entries(optimizer.visitors)) {
if (allVisitors[key] == null) {
allVisitors[key] = [];
}
allVisitors[key].push({
name: optimizer.name,
func: visitor
});
}
}
return allVisitors;
}
// FIXME: StopMatching marker to signal that eg. a generic visitor should no longer match after a specific one?
// FIXME: OriginalNode marker to explicitly indicate that any transformations applied by *other* visitors should be thrown out?
function defer(func) {
return { __type: "defer", func: func };
}
module.exports = function optimizeTree(ast, optimizers) {
// NOTE: Depth-first!
let visitors = combineOptimizers(optimizers);
let timings = createTimings(optimizers);
let debuggers = createDebuggers(optimizers);
// FIXME: Dirty tracking for stabilization detection
let visitorsByType = mapObj(visitors, (key, value) => {
return concat([
defaultValue(visitors[key], []),
defaultValue(visitors["*"], []),
]);
});
function handle(node/*, parentStateHandlers*/) {
let deferFuncs = [];
// let stateHandlers = Object.create(parentStateHandlers);
let stateLog = [];
function registerStateHandler(name, func) {
stateHandlers[name] = function (value) {
// FIXME: test setParentState
return func(value, { setState: setParentState });
};
}
function setParentState(name, value) {
if (parentStateHandlers[name] != null) {
parentStateHandlers[name](value);
}
}
function setState(name, value) {
stateLog.push({ type: "set", name, value });
// if (stateHandlers[name] != null) {
// stateHandlers[name](value);
// }
}
function resetAllListeners() {
// Called whenever a node is invalidated (replaced, removed, ...) and therefore no longer has a reason to receive any kind of callbacks
deferFuncs = [];
// stateHandlers = Object.create(parentStateHandlers);
stateLog.push({ type: "reset" });
}
function applyVisitors(node) {
let nodeVisitors = visitorsByType[node.type];
/*
run each visitor on the node, until either
a) the node is invalidated (removed, substituted)
b) the list of visitors is finished
then process all children
then
*/
if (nodeVisitors == null) {
return node;
} else {
let lastNode = node;
for (let visitor of nodeVisitors) {
// eslint-disable-next-line no-loop-func
let { value: result, time } = measureTime(() => {
return visitor.func(lastNode, { registerStateHandler, setState, defer });
});
// FIXME: Re-evaluate children after presence in a new subtree? Is this necessary for making sure the context tree is correct?
timings[visitor.name] += time;
if (result === NoChange) {
// no-op
} else if (result == null) {
throw new Error(`A visitor is not allowed to return null or undefined; if you intended to leave the node untouched, return a NoChange marker instead`);
} else if (result === RemoveNode) {
debuggers[visitor.name](`Node of type '${typeOf(lastNode)}' removed`);
lastNode = RemoveNode;
resetAllListeners();
break; // Node has gone stale, stop applying visitors to it
} else if (result.__type === "defer") {
deferFuncs.push(result.func);
continue;
} else if (result.__raqbASTNode === true) {
// New subtree to replace the old one
if (result === node) {
// Visitor returned the original node again; but in this case, it should return NoChange instead. We enforce this because after future changes to the optimizer implementation (eg. using an internally-mutable deep copy of the tree), we may no longer be able to *reliably* detect when the original node is returned; so it's best to already get people into the habit of returning a NoChange marker in those cases, by disallowing this.
throw new Error(`Visitor returned original node, but this may not work reliably; if you intended to leave the node untouched, return a NoChange marker instead`);
} else {
debuggers[visitor.name](`Node of type '${typeOf(lastNode)}' replaced by node of type '${typeOf(result)}'`);
lastNode = result;
resetAllListeners();
break; // Node has gone stale, stop applying visitors to it
}
} else {
throw new Error(`Visitor returned an unexpected type of return value: ${util.inspect(result)}`);
}
}
if (lastNode !== node) {
// We re-evalue the new node before leaving control to the children handler, as the old one has been substituted, and therefore new visitors might be applicable.
// FIXME: Is RemoveNode getting handled correctly here?
// FIXME: This needs to be moved outside of this function to also re-apply other visitors correctly
return applyVisitors(lastNode);
} else {
return lastNode;
}
}
}
node = applyVisitors(node);
// FIXME: Eventually hardcode the available properties for different node types (and them being single/multiple), for improved performance?
let changedProperties = {};
for (let [ property, value ] of Object.entries(node)) {
if (value == null) {
continue;
} else if (value.__raqbASTNode === true) {
let newValue = handle(value, stateHandlers);
if (newValue !== value) {
changedProperties[property] = newValue;
}
} else if (Array.isArray(value) && value.length > 0 && value[0].__raqbASTNode === true) {
// NOTE: We assume that if an array in an AST node property contains one AST node, *all* of its items are AST nodes. This should be ensured by the input wrapping in the operations API.
// eslint-disable-next-line no-loop-func
let newValues = value.map((item) => handle(item, stateHandlers));
if (newValues.some((newValue, i) => newValue !== value[i])) {
changedProperties[property] = newValues.filter((item) => item !== RemoveNode);
}
} else {
// Probably some kind of literal value; we don't touch these.
continue;
}
}
if (Object.keys(changedProperties).length === 0) {
return node;
} else {
let newNode = Object.assign({}, node, changedProperties);
// FIXME: Think carefully about whether there is *ever* a valid reason to remove a single node! As array items are already taken care of above, and leave an empty array at worst, which can make sense. Possibly we even need to encode this data into node type metadata.
for (let [ key, value ] of Object.entries(newNode)) {
if (value === RemoveNode) {
delete newNode[key];
}
}
return newNode;
}
// FIXME: Possibly optimize the "node gets returned unchanged" case, somehow? Perhaps by propagating the NoChange marker? But object creation is fast, so that may actually make things slower than just blindly creating new objects...
// return syncpipe(node, [
// (_) => applyVisitors(_),
// (_) => handleChildren(_)
// ]);
}
let { value: rootNode, time } = measureTime(() => {
return handle(ast, {});
});
// let timeSpentInOptimizers = Object.values(timings).reduce((sum, n) => sum + n, 0n);
let timeSpentInOptimizers = Object.values(timings).reduce((sum, n) => sum + n, 0);
if (rootNode !== RemoveNode) {
return {
ast: rootNode,
timings: {
"# Total": time,
"# Walker overhead": time - timeSpentInOptimizers,
... timings,
}
};
} else {
unreachable("Root node was removed");
}
};

@ -0,0 +1,85 @@
"use strict";
/* eslint-disable no-undef */
/*
SELECT
reviews.movie_id,
movie.title AS title,
COUNT(reviews.*) AS positive_review_count
FROM reviews
JOIN
movies AS movie ON reviews.movie_id = movie.id
WHERE
reviews.rating > 3
AND movie.title LIKE '%Movie%'
GROUP BY (reviews.movie_id, movie.id);
*/
query = select("reviews", [
define({ movie: belongsTo("movie_id") }),
where({
rating: moreThan(3),
movie: { title: includes("Movie") }
}),
collapseBy("movie_id", [
compute({ positive_review_count: count() }),
renameColumn("movie.title", "title")
])
]);
/*
SELECT
movies.*,
COUNT(reviews_1.*) AS positive_review_count
FROM movies
LEFT JOIN
reviews AS reviews_1 ON
movies.id = reviews_1.movie_id
AND reviews_1.rating > 3
GROUP BY movies.id;
NOTES:
- Must be a LEFT JOIN, to ensure that the rows from the parent table are always present
- Foreign predicates are in the ON, not the WHERE; that way they filter the foreign inputs, not the JOIN output?
MARKER: Continue fleshing out the mental model for all these different features, and how they translate to SQL. Also figure out where *non*-relation virtual tables fit into the picture, with aggregrates and such - especially whether they also require the primary-key GROUP BY.
*/
query = select("movies", [
compute({ // better name for withDerived?
positive_review_count: count(has("reviews.movie_id", [
where({ rating: moreThan(3) })
]))
})
]);
query = select("movies", [
define({ reviews_1: has("reviews.movie_id", [
where({ rating: moreThan(3) })
])}),
compute({ // better name for withDerived?
positive_review_count: count("reviews_1")
})
]);
/*
SELECT
movies.*,
COUNT(reviews_1.*) AS positive_review_count
FROM movies
JOIN
reviews AS reviews_1 ON movies.id = reviews_1.movie_id
WHERE reviews_1.rating > 3
GROUP BY movies.id;
*/
query = select("movies", [
define({
releases: has("releases.movie_id"),
positive_reviews: has("reviews.movie_id", [
where({ rating: moreThan(3) })
])
}),
compute({ // better name for withDerived?
positive_review_count: count("positive_reviews"),
initial_release_year: lowestOf("releases.year")
})
]);

@ -5,7 +5,7 @@ Error.stackTraceLimit = 100;
const util = require("util");
const chalk = require("chalk");
let { select, onlyColumns, where, withRelations, withDerived, column, through, inValues, unsafeSQL, postProcess, belongsTo, has, value, parameter, not, anyOf, allOf, lessThan, moreThan, alias, foreignColumn, table, expression, equals } = require("../src/operations");
let { select, onlyColumns, where, withRelations, withDerived, column, through, inValues, unsafeSQL, postProcess, belongsTo, has, value, parameter, not, anyOf, allOf, lessThan, moreThan, alias, foreignColumn, table, expression, equals, collapseBy, compute, hierarchical, sum, count, addColumns } = require("../src/operations");
const astToQuery = require("../src/ast-to-query");
const optimizeAST = require("../src/ast/optimize");
const measureTime = require("../src/measure-time");
@ -31,11 +31,6 @@ let c = 452;
try {
let buildingResult = measureTime(() => {
// query = expression({
// left: "foo",
// condition: equals("bar")
// });
// Edit me!
/* Available functions:
@ -47,122 +42,50 @@ try {
expression, unsafeSQL
*/
let niceNumbers = anyOf([ 1, 2, 3 ]);
query = select("projects", [
where({
number_one: niceNumbers,
number_two: niceNumbers
}),
where({
number_three: anyOf([ 42, column("number_one") ]),
number_four: null
})
]);
// FIXME: Test duplicate fields, in different WHEREs, eg. number_three
// let niceNumbers = anyOf([ 1, 2, 3 ]);
// query = select("projects", [
// onlyColumns([ "foo" ]),
// where({
// number_one: niceNumbers,
// number_two: niceNumbers
// }),
// where({
// number_three: anyOf([ 42, column("number_one") ]),
// number_four: 1337
// number_four: moreThan(1337)
// })
// ]);
// query = select("projects", [
// where({
// // foo: anyOf([ "bar", not(not("baz")), anyOf([ "bar2", "baz2" ]), unsafeSQL("TRUE") ]),
// // qux: anyOf([ 13, moreThan(42) ]),
// complex: anyOf([
// 30,
// 40,
// allOf([
// moreThan(100),
// lessThan(200),
// lessThan(parameter("max"))
// ])
// ])
// }),
// // where({ second: 2 }),
// // where(not({
// // thirdA: 3,
// // thirdB: 3
// // })),
// // where(anyOf([ { foo: "bar" } ]))
// ]);
// query = select("projects", [
// onlyColumns([
// "foo",
// alias("bar", 42),
// alias("baz", sql("foo"))
// ]),
// where(anyOf([
// { foo: "bar", qux: anyOf([ "quz", "quy" ]) },
// { baz: lessThan(42) }
// ]))
// ]);
/* {
query: 'SELECT foo, ? AS bar, foo AS baz FROM projects WHERE foo = ? OR baz < ?;',
params: [ 42, 'bar', 42 ],
placeholders: []
} */
// query = select("projects", [
// onlyColumns([ "id", "name" ]),
// where({
// active: true,
// visible: true,
// // primary_category_id: anyOf([ 2, 3, 5, 7, 8 ])
// // primary_category_id: anyOf(parameter("categoryIDs"))
// primary_category_id: not(anyOf(parameter("categoryIDs"))) // FIXME/MARKER: This gets stringified wrong!
// }),
// // FIXME: where pivot table entry exists for category in that list
// withRelations({
// primaryCategory: belongsTo("primary_category_id", { query: [ withOwner() ] }),
// categories: through([
// has("projects_categories.project_id", { query: [
// // Optional extra clauses for the query on the pivot table, eg. for filtering entries
// where({ adminApproved: true })
// ]}),
// "category_id"
// ]),
// // all user groups for a given project ID -> all memberships for the given user group IDs -> for each membership, the record referenced by the given user_id
// users: through([ "user_groups.project_id", "membership.user_group_id", "user_id" ]),
// // ... expands to ...
// // users: through([
// // has({ column: foreignColumn({ table: "user_groups", column: "project_id" }) }),
// // has({ column: foreignColumn({ table: "memberships", column: "user_group_id" }) }),
// // belongsTo({ column: column("user_id") }),
// // ]),
// owner: "owner_id",
// // ... expands to
// // owner: belongsTo({ column: "owner_id" }),
// releases: "releases.project_id",
// // ... expands to ...
// // releases: has({ column: "releases.project_id" })
// }),
// withDerived({
// capitalized_name: sql("UPPER(name)"),
// team_count: sql("moderator_count + admin_count"),
// // fourty_two: value(42), // This makes no sense in withDerived!
// name_distance: (project) => wordDistanceAlgorithm(project.name, "someReferenceName") // NOTE: This could have returned a Promise!
// }),
// mapCase({ from: "snake", to: "camel" })
// ]);
/*
Generation timings:
Building: 7.79ms
Stringifying: 13.38ms
Optimization timings:
# Total: 6.07ms
# Walker overhead: 1.16ms
collapse-where: 0.50ms
conditions-to-expressions: 1.08ms
flatten-not-predicates: 0.00ms
flatten-predicate-lists: 0.21ms
arrayify-predicate-lists: 3.12ms
{
query: 'SELECT * FROM projects WHERE number_one = ANY(?) AND number_two = ANY(?) AND number_three = ANY(ARRAY[?, number_one]) AND number_four > ?;',
params: [ [ 1, 2, 3 ], [ 1, 2, 3 ], 42, 1337 ],
placeholders: []
}
*/
// SELECT country_id, store_id, color, size, COUNT(*) AS total_sold, SUM(price) AS total_revenue FROM sales GROUP BY color, size, ROLLUP (country_id, store_id);
query = select("sales", [
collapseBy([ "color", "size", hierarchical([ "country_id", "store_id" ]) ], [
compute({
total_sold: count(),
total_revenue: sum("price")
})
])
]);
});
console.log(util.inspect(query, { depth: null, colors: true }));

File diff suppressed because it is too large Load Diff

@ -0,0 +1,434 @@
"use strict";
// JOIN
query = combine([
select("nodes", [
where({ id: parameter("id") })
]),
select("node_revisions", [
where({ node_id: foreignColumn("nodes.id") })
])
]);
// UNION
let whereClause = { id: parameter("id") };
query = concatenate([
select("old_nodes", [ whereClause ]),
select("new_nodes", [ whereClause ]),
]);
// GROUP BY / aggregrates
query = select("weather_reports", [
where({ city: startsWith("S") }),
collapseBy("city", [
compute({ maximum_temperature: highestOf("temperature") }),
where({ maximum_temperature: lessThan(40) })
])
]);
query = select("reviews", [
where({ rating: moreThan(3) }),
collapseBy([ "movie_source", "movie_id" ], [
compute({ positive_review_count: count() })
])
]);
// movies: data_source, data_id, title, PRIMARY(data_source, data_id)
// reviews: id, movie_source, movie_id, rating, FOREIGN(movie_source, movie_id) -> movies .data_source, .data_id
// intended output: [ title, count ] for each movie whose title starts with an A
// NOTE: Conceptually, a relation is a 'virtual table' that references all the associated foreign rows for a given local row
// For something like the below, actually return the `movie.title` field as a column named exactly that by default; using quotes to be able to include the dot. Nevertheless the user might want to *rename* certain columns in a constrained query like a collapseBy query, where the columns are normally automatically determined based on the specified computations and grouping conditions; neither addColumns nor onlyColumns make semantic sense for that (the latter should be reserved for removing certain columns from the results, and *should* work with collapseBy, just with constrained valid column names!), so we probably need a renameColumn construct that gets rid of the original `movie.title` column and eg. replaces it with `movie_title`.
// FIXME: Think carefully about how the semantics of belongsTo would change when used in defineAs rather than withRelations. Ideally the semantics shouldn't change at all. However, if it's also possible for someone to specify a `has` relation (which points at *multiple* foreign rows), then we need a way to deal with zero-or-more rows here anyway, maybe that can be the same semantics as in withRelations?
// I guess that "there must be at least one foreign row that matches the specified constraints" is a reasonable criterium for the local row to be includer or not? Then there should probably be a LIMIT 1 on the foreign query, to ensure that it only ever uses the values of whatever the first-encountered row is. This means there would be undefined behaviour when referring variable fields, though.
// NOTE: Make sure that any design around this is also capable of dealing with composite keys! Maybe require a composite(...) wrapper for those, to distinguish from other array-ish things...
// Definitely need this to distinguish from GROUPING SETS in GROUP BY (which is also an array); otherwise ["brand", "size"] would be ambiguous.
/*
SELECT
reviews.movie_id,
COUNT(reviews.*) AS positive_review_count
FROM reviews
JOIN
movies ON reviews.movie_id = movies.id
WHERE
reviews.rating > 3
GROUP BY reviews.movie_id;
*/
query = select("reviews", [
define({ movie: belongsTo("movie_id") }),
where({
rating: moreThan(3),
movie: { title: includes("Movie") }
}),
collapseBy("movie_id", [
compute({ positive_review_count: count() }),
renameColumn("movie.title", "title")
])
]);
// FIXME: What if we were to collapse by ID and then want to also get the movie title?
// reviews: id, movie_id, rating
// movies: id, title, release_year
query = select("movies", [
define({ reviews: has("reviews.movie_id") }),
where({
title: startsWith("A"),
reviews: { rating: moreThan(3) }
}),
collapseBy("release_year", [
compute({ positive_review_count: count("reviews") }), // FIXME: Ambiguity between table and column names here, maybe just default to table? Since column counts are rare
])
]);
// The below is functionally identical to the above; it is generally permitted to specify WHERE constraints for `define`d relations in the top-level WHERE
query = select("movies", [
define({ reviews: has("reviews.movie_id", [ where({ rating: moreThan(3) }) ]) }),
where({ title: contains("Movie") }),
collapseBy("release_year", [
compute({ positive_review_count: count("reviews") }), // FIXME: Ambiguity between table and column names here, maybe just default to table? Since column counts are rare
])
]);
// reviews: id, movie_id, rating
// movies: id, title, release_year
// SELECT movies.release_year, COUNT(reviews.*) AS positive_review_count FROM movies JOIN reviews ON movies.id = reviews.movie_id WHERE reviews.rating > 3 AND movies.title LIKE '%Movie%' GROUP BY movies.release_year;
// Differences from query below:
// 1) GROUP BY movies.release_year instead of primary key,
// 2) SELECT movies.release_year instead of movies.*, as per collapseBy semantics
// 3) Additional WHERE clause for movies.title
query = select("movies", [
where({ title: contains("Movie") }),
collapseBy("release_year", [
compute({
positive_review_count: count(has("reviews.movie_id", [
{ rating: moreThan(3) }
]))
}),
])
]);
/*
SELECT
movies.*,
COUNT(reviews.*) AS positive_review_count
FROM movies
JOIN
reviews ON movies.id = reviews.movie_id
WHERE reviews.rating > 3
GROUP BY movies.id;
*/
// NOTE: The GROUP BY is necessary because otherwise COUNT (which is an aggregrate function) cannot be used; the field name to use there, however, cannot be statelessly determined as it requires information about the primary key of the table
// SELECT movies.*, COUNT(reviews.*) OVER (PARTITION BY reviews.movie_id) AS positive_review_count FROM movies JOIN reviews ON movies.id = reviews.movie_id WHERE reviews.rating > 3;
// ^ Produces the correct data, but duplicates rows; window functions may be useful for combining aggregrated information and direct rows, though
// SELECT DISTINCT movies.*, COUNT(reviews.*) OVER (PARTITION BY reviews.movie_id) AS positive_review_count FROM movies JOIN reviews ON movies.id = reviews.movie_id WHERE reviews.rating > 3;
// ^ Works! But may be slower? As it has to actually compare the resultset data, as opposed to eliminating rows by their `id`
query = select("movies", [
compute({ // better name for withDerived?
positive_review_count: count(has("reviews.movie_id", [
{ rating: moreThan(3) }
]))
})
]);
// subqueries and VALUES (virtual tables)
// FIXME: subqueries can result in scalar values, somehow, in some cases? Need to look into the details of this
query = select("films", [
defineAs("archived_rental_rates", select("films", [
compute({ average: averageOf("rental_rate") }),
where({ archived: true })
])),
// Alternatively?
defineAs("rental_rates.average", averageOf("rental_rate")),
where({ rental_rate: moreThan(foreignColumn("archived_rental_rates.average")) })
]);
let virtualCustomersTable = virtualTable([{
first_name: "anne",
last_name: "smith",
age: 42
}, {
first_name: "bob",
last_name: "jones",
age: 28
}, {
first_name: "joe",
last_name: "blow",
age: 49
}]);
query = select(virtualCustomersTable, [
where({ first_name: "anne" })
]);
query = select("customers", [
defineAs("target_customers", virtualCustomersTable),
// withQueryAs("average_age", select("target_customers", [
// compute({ average: averageOf("age") })
// ])),
// NOTE: The below would need to be translated into an aggregrate subquery that is then referenced from the parent query; this case is identified by an aggregrate function being called with a *foreign* column name. For this, it may need to additionally insert data into the AST that signals to an optimizer that a subquery needs to be produced somewhere further up the AST
// FIXME: Ambiguity between "foreign column in a JOIN" and "foreign column referring to a virtual subquery table"; especially in the case where combine and defineAs are both used, and the scoping in the raqb might differ from that in the SQL query? Though it probably isn't allowed to redefine names like that anyway?
where({ age: moreThan(averageOf("target_customers.age")) })
]);
query = select("fdt", [
where({
c1: anyOf(listFrom(select("t2", [
onlyColums([ "c3" ]),
where({ c2: add([
foreignColumn("fdt.c1"),
10
]) })
])))
})
]);
query = select("table1", [
where({
column1: anyOf(listFrom("table2", "column3"), [
where({ column2: add([ 10, foreignColumn("table1.column1") ]) })
])
})
]);
// https://docs.actian.com/actianx/11.1/index.html#page/SQLRef/Scalar_Subqueries.htm
// NOTE: `compute` assumes the value side to implicitly be a possiblyForeignColumnName. Literal values must be explicit.
// NOTE: There is a second form of `compute` that only produces a single, scalar value.
let highestSalary = valueFrom("employees", compute(highestOf("salary")));
let averageCommission = valueFrom("commissions", compute(averageOf("bonus")));
query = select("employees", [
define("department", belongsTo("department_id")),
where({ department: { name: "finance" } }),
compute({
employee_name: "emp_name",
department_name: "department.name",
average_commission: averageCommission,
highest_salary: highestSalary
})
]);
// More complex version of the above, comparing only salaries in Finance
let financeEmployees = select("employees", [
define("department", belongsTo("department_id")),
where({ department: { name: "finance" } })
]);
query = select("employees", [
define("department", belongsTo("department_id")),
define("commissions", select("commissions")),
define("finance_employees", financeEmployees),
where({ department: { name: "finance" } }),
compute({
employee_name: "emp_name",
department_name: "department.name",
average_commission: averageOf("commissions.bonus"),
average_salary: averageOf("finance_employees.salary"),
highest_salary: highestOf("finance_employees.salary")
})
]);
query = select("sales", [
collapseBy([ "product_id", hierarchical([ "country_id", "city_id", "store_id" ]) ], [
compute({
total_sold: count(),
total_revenue: sum("revenue")
})
])
]);
// Or, when the data is in normalized form:
query = select("sales", [
define({
country: belongsTo("city.country_id"),
city: belongsTo("store.city_id"),
store: belongsTo("store_id")
}),
collapseBy([ "product_id", hierarchical([ "country.id", "city.id", "store_id" ]) ], [
compute({
total_sold: count(),
total_revenue: sum("revenue")
})
])
]);
// Or, inverted:
query = select("countries", [
define({
city: has("cities.country_id"),
store: has("stores.city_id"),
sales: has("sales.store_id")
}),
collapseBy([ "sales.product_id", hierarchical([ "id", "city.id", "store_id" ]) ], [
compute({
total_sold: count("sales"),
total_revenue: sum("sales.revenue")
})
])
]);
// Multiple collapseBy clauses:
// SELECT color, size, (CASE WHEN size IS NOT NULL THEN SUM(price) ELSE COUNT(*) END) AS total_sales FROM sales GROUP BY GROUPING SETS ( color, (color, size) );
// This introduces a lot of query analysis complexity, let's not implement it unless someone actually needs it...
query = select("sales", [
collapseBy("color", [
compute({ total_sales: count() })
]),
collapseBy(permutationsOf([ "color", "size" ]), [
compute({ total_sales: sum("price") })
])
]);
// FIXME: count
// FIXME: Special case, handling clauses that cannot exist within an ON, eg. ORDER BY
// FIXME: Test duplicate fields, in different WHEREs, eg. number_three
// NOTE: Need to use table aliases for self-combines/self-relations
// FIXME: cartesianProduct(...) wrapper for combine. in addition to anyOf/allOf
// FIXME: startsWith -> LIKE FOO%, endsWith -> LIKE %FOO, includes -> LIKE %FOO% -- figure out input escaping here!
// FIXME: does LIKE work with ANY/ALL?
// FIXME: Performance-class optimizer that moves non-aggregrate conditions out of the collapseBy conditions and into the parent select
// FIXME: Document that collapseBy will override the selected columns, and disallow the usage of addColumns/onlyColumns (and why that is)
// FIXME: Actually implement that limitation, too!
// FIXME: Is there an 'array' aggregrate function for when someone wants all of the unique items within a group, or something like that?
// FIXME: optionalArrayOf combinator, and permit omitting the array in all cases where an array is expected and an array of 1 item would be valid
// In practice this means permitting omitting the array (and implicitly wrapping it) anywhere, since it will then hit the regular item count check anyway.
// FIXME: Disallow the use of both compute and collapseBy wthin a `select`; require the compute to be inside the collapseBy, in that case
// FIXME: Table functions in place of a table name
// FIXME: "create function" and "create view" interfaces
// NOTE: Ensure that no matter where column references are provided, local column references *always* refer to the currently-containing context
// Need to go through all the possible combinations of constructs, and verify that this holds true, also for eg. sub-queries
// NOTE: Sometimes subqueries should be defined using WITH rather than inline or in a FROM, eg. if they do not appear in the output through a JOIN
// FIXME: IN ... clauses can accept subqueries, is this also true for ANY/ALL?
// NOTE: Subqueries in WHERE clauses can reference the outer query (lexical scoping), ie. the item being evaluated! Make sure that this is represented well in the API design.
// FIXME: Have an 'optimizer' (new 'correctness' category?) that verifies that all foreign column references are semantically valid inside of a given query? Are there cases where some sort of predefined table can be accessed without defining it in the query? If yes, it should probably be required to define this upfront in raqb queries.
// FIXME: Figure out how ordering relates to JOINs
// FIXME: JSON queries, via jsonQuery({ ... }) wrapper or so
// FIXME: Implement nested WHERE for foreign constraints
// FIXME: Bulk inserts! Should make sure that VALUES (which is probably needed for this) does not require a parameter for every single individual value
// FIXME: Figure out a solution for update/delete with LIMIT
// FIXME: initializeWith for column additions in schema (create with nullable -> update -> set non-nullable)
// This is different from defaultTo(...), which continues to be applicable *after* creating the column
// FIXME: Non-parameterizable placeholder representation, for dealing with things like currently-unknown primary keys, that will only be known upon query execution (as the executor knows the table structure)
// FIXME: Replace aliases with `compute`
query = createTable("films", {
fields: {
id: primaryKey(),
title: text(),
description: [ text(), canBeNull() ],
},
indexes: [
compositeIndex([ "data_source", "data_source_id" ])
]
});
combine(/* [ clause ], allOf([ clause ]), anyOf([ clause]) */);
// let niceNumbers = anyOf([ 1, 2, 3 ]);
// query = select("projects", [
// where({
// number_one: niceNumbers,
// number_two: niceNumbers
// }),
// where({
// number_three: anyOf([ 42, column("number_one") ]),
// number_four: 1337
// })
// ]);
// query = select("projects", [
// where({
// // foo: anyOf([ "bar", not(not("baz")), anyOf([ "bar2", "baz2" ]), unsafeSQL("TRUE") ]),
// // qux: anyOf([ 13, moreThan(42) ]),
// complex: anyOf([
// 30,
// 40,
// allOf([
// moreThan(100),
// lessThan(200),
// lessThan(parameter("max"))
// ])
// ])
// }),
// // where({ second: 2 }),
// // where(not({
// // thirdA: 3,
// // thirdB: 3
// // })),
// // where(anyOf([ { foo: "bar" } ]))
// ]);
// query = select("projects", [
// onlyColumns([
// "foo",
// alias("bar", 42),
// alias("baz", sql("foo"))
// ]),
// where(anyOf([
// { foo: "bar", qux: anyOf([ "quz", "quy" ]) },
// { baz: lessThan(42) }
// ]))
// ]);
/* {
query: 'SELECT foo, ? AS bar, foo AS baz FROM projects WHERE foo = ? OR baz < ?;',
params: [ 42, 'bar', 42 ],
placeholders: []
} */
// query = select("projects", [
// onlyColumns([ "id", "name" ]),
// where({
// active: true,
// visible: true,
// // primary_category_id: anyOf([ 2, 3, 5, 7, 8 ])
// // primary_category_id: anyOf(parameter("categoryIDs"))
// primary_category_id: not(anyOf(parameter("categoryIDs"))) // FIXME/MARKER: This gets stringified wrong!
// }),
// // FIXME: where pivot table entry exists for category in that list
// withRelations({
// primaryCategory: belongsTo("primary_category_id", { query: [ withOwner() ] }),
// categories: through([
// has("projects_categories.project_id", { query: [
// // Optional extra clauses for the query on the pivot table, eg. for filtering entries
// where({ adminApproved: true })
// ]}),
// "category_id"
// ]),
// // all user groups for a given project ID -> all memberships for the given user group IDs -> for each membership, the record referenced by the given user_id
// users: through([ "user_groups.project_id", "membership.user_group_id", "user_id" ]),
// // ... expands to ...
// // users: through([
// // has({ column: foreignColumn({ table: "user_groups", column: "project_id" }) }),
// // has({ column: foreignColumn({ table: "memberships", column: "user_group_id" }) }),
// // belongsTo({ column: column("user_id") }),
// // ]),
// owner: "owner_id",
// // ... expands to
// // owner: belongsTo({ column: "owner_id" }),
// releases: "releases.project_id",
// // ... expands to ...
// // releases: has({ column: "releases.project_id" })
// }),
// withDerived({
// capitalized_name: sql("UPPER(name)"),
// team_count: sql("moderator_count + admin_count"),
// // fourty_two: value(42), // This makes no sense in withDerived!
// name_distance: (project) => wordDistanceAlgorithm(project.name, "someReferenceName") // NOTE: This could have returned a Promise!
// }),
// mapCase({ from: "snake", to: "camel" })
// ]);

@ -148,6 +148,24 @@ x Flatten nested same-typed allOfExpression/anyOfExpression
-----
Issue tracker template:
<!--
Hi! Thanks for trying out raqb. If you're reading this, that means you've probably either found a bug, or you're not quite sure how to use something. We'd be happy to help!
To help us help you, please provide as much detail about your problem as possible. This includes example code, a description of what your goal is and the usecase it's for, exactly how it's failing (eg. a copy-pasted error message), and so on. The more detail you provide, the faster we can find a solution together!
When posting code, please provide as much surrounding code as you can, and change it as little as possible from how it looks in your project. The more context you provide, the easier it is for us to understand what you're trying to do. It's okay for your code to be messy!
Please also keep in mind the following things, which are unlike most other open-source projects:
- raqb is an open-source project in every sense of the term. It's meant to empower open-source (especially non-commercial) developers to build better software, faster. Because of this, we will not implement features in raqb that are meant to solve problems specific to proprietary software (eg. private optimizers). You are still free to use raqb in proprietary software, as the license allows it - but you're expected to be a good open-source citizen, and contribute back your improvements.
- Technology and politics are inseparable. Software is infrastructure, and the choices that software developers make directly and indirectly impact the lives of millions of people - disproportionately those with less power in society. Because of this, we WILL NOT PROVIDE SUPPORT to ethically objectionable organizations. This includes, but is not limited to, organizations that build software for deportation, and the adtech industry. You are not welcome in our community.
You can remove this block of text after reading it, or start typing after the arrow below. Either way, this block of text will not be visible in your issue.
-->
-----
MARKER:
- Refactor relation operations to new design
- Implement AST optimization infrastructure (incl. solving the immutable reference problem?)
@ -157,3 +175,148 @@ MARKER:
NOTE: May need https://www.npmjs.com/package/browser-hrtime for process.hrtime.bigint support in browsers! Need to investigate whether this (or something similar) is already being used by bundlers by default, or whether they use a shim without bigint support.
FIXME: Remove all the .type stuff and replace with typeOf()
FIXME: Document that `DEBUG=raqb:ast:optimize:*` can be used for tracking down unstable optimizers (symptom = stack overflow)
Pitch: "Easier than MongoDB", no more if(foo.bar) checks, no more data format mismatches, the database ensures for you that all data looks like expected, and it's easy to change the format if needed.
------
# PostgreSQL design notes
VALUES: virtual, in-memory table
JOIN: combine fields of rows in multiple tables, correlating on some sort of inter-table predicate
INNER: only rows that exist in all tables, ie. with correlation
FULL: all rows in any table, even without correlation
LEFT/RIGHT: FULL for one side, INNER for the other
CROSS: every row in every table combined with every row in every other table (cartesian product)
LATERAL: modifier that marks a query as having a data dependency on some other query in the JOIN, eg. comparing against an aggregrate function over another table
ex. https://heap.io/blog/engineering/postgresqls-powerful-new-join-type-lateral
ON <condition>: specifies the join condition
USING (<column>): shorthand for `ON a1.<column> = a2.<column>`
UNION: concatenate rows from multiple tables
GROUP BY <predicate>: produces 1 "collapsed" row for each unique <predicate> value, with some of the result columns possibly being the results of aggregrate functions over all of the "collapsed" input values for that row
<- WHERE: controls inputs into the aggregrate functions *without* knowledge of aggregrate function results (more efficient, as it does not aggregrate rows eliminated here)
HAVING: controls outputs of the aggregrate functions and inputs into the results *with* knowledge of aggregrate function results - note that this still eliminates individual rows, *not* the "collapsed" rows!
(should divert aggregrate constraints into HAVING, and all others into WHERE)
ROLLUP -> hierarchyOf - (a, b, c), (a, b), (a) -- cannot be nested!
CUBE -> permutationsOf (including implicit null for each slot) -- cannot be nested!
comma list -> permutationsOf (excluding implicit null for each slot)
# Table functions (things that produce tables) - note that some table functions might require explicit defining of output columns with `AS`, as it controls function behaviour
UNNEST: array(s) to rows, analogous to Array#values; one row per max(arrayLengths) array index, one column per input array
WITH ORDINALITY: analogous to Array#entries, add a column with the index ("ordinality") of each row; column name defaults to 'ordinality'
ROWS FROM(): like JOIN, but for table functions, correlated on index ("ordinality"); each individual function call can have its own `AS` clause
# GROUP BY combinator semantics
GROUP BY foo, bar; -- group by permutationsOf(foo, bar)
GROUP BY (foo, bar); -- group by compositeKey(foo, bar)
GROUP BY foo, (bar, baz) -- group by permutationsOf(foo, compositeKey(bar, baz))
GROUP BY CUBE(foo, bar) -- group by ???(foo, bar)
CUBE ( a, b, c )
is equivalent to
GROUPING SETS (
( a, b, c ),
( a, b ),
( a, c ),
( a ),
( b, c ),
( b ),
( c ),
( )
)
a, b, GROUPING SETS (c, d)
is equivalent to
GROUPING SETS (
( a, b, c ),
( a, b, d )
)
Limitations when grouping:
- Column selection list may only contain columns that are:
a) guaranteed to be identical across all collapsed rows (ie. they are specified as a GROUP BY constraint)
b) the results of aggregrate functions over all of the collapsed rows
- WHERE may only contain column-references that apply directly to the to-be-collapsed rows, not wrapped in aggregrate functions
- HAVING may only contain column-references that are:
a) wrapped in an aggregrate function
b) specified in the column selection as guaranteed-identical columns
note: aliases to results of aggregrate functions are *not* permitted here, for some reason
Aggregrate functions:
- Specifying an aggregrate function in the column selection list, means an implicit `GROUP BY` where there is only a single group consisting of all rows!
- This means that aggregrate functions cannot be combined with custom column selection, *even if* no grouping was specified.
- The same also happens when specifying a HAVING clause, though that in and of itself usually means there's going to be an aggregrate function somewhere.
Joins:
- Need to consistently use ON (...) syntax for JOIN conditions, not WHERE, because WHERE does not work for outer joins.
# Mental model notes
- A JOIN is just a virtual table that is scoped to the currently-being-processed row. This also applies when `define`ing a has/belongsTo relation!
- A `compute` with an aggregrate function on a local(!) column directly in a SELECT is equivalent to `collapsedBy(value(true), [ compute(...) ])`, and means the whole query switches to collapsed mode. But this is not applicable for aggregrate functions on referenced (foreign) tables!
- A `collapseBy` + aggregrate function essentially means uniqueValues(groupingColumn).map((value, rows) => [ value, aggregrateFunction(rows) ])
- GROUPING SETS is effectively a UNION for the to-be-analyzed groups
- This means that when the user specifies multiple `collapseBy` clauses, we should produce a GROUPING SETS that contains each of them. If only a single one, we omit it.
- `GROUP BY (color, size)` is equivalent to `GROUP BY color, size`
# Base types
- Scalar values
- Lists (arrays of same-typed scalar values)
- Resultsets/Tables (grids of rows and schemaful columns)
# Query construction strategies
Observations:
- Whenever we are JOINing, we need to prefix column names with table names. We should probably have an optimizer that auto-converts local columnNames into foreignColumnNames in JOIN-y contexts, based on that context's local table name. We can't just keep them local, because PostgreSQLs semantics are different and not scope-dependent, so we have to be explicit about this.
- This requires a general-purpose context abstraction in the AST walker for tracking scope, eg. a `setContext` method passed to the visitor, the argument to which is then used to generate a new context for its walked children (but not used outside of that). Contexts can be shallow-merged, which allows for overriding.
- Since the same table can be JOINed with more than once, we might need to generate incrementing numbers for the different cases, if no explicit names are specified. This is especially true for cases where relations are specified as inline references in eg. `compute` clauses.
- When computing aggregrate functions over columns of relations, that constitutes an implicit GROUP BY own_primary_key.
- In GROUP BY clauses, it seems that parenthesized expression lists are only semantically meaningful inside of a CUBE/ROLLUP/GROUPING SETS, as in the top-level GROUP BY clause, the result is the same with or without parentheses.
- Where it *is* semantically meaningful, it denotes "produce a group for every permutation of the values in these columns"
- A single item in GROUPING SETS is equivalent to not using GROUPING SETS at all, similar to how it would work with a UNION.
- GROUP BY CUBE (a, b) is equivalent to GROUP BY (a, b) except when using CUBE, the 'pretend this field does not take part' condition is also considered for each field
- We'll ignore CUBE for now, and just implement (a, b) and ROLLUP (a, b)
<query> = select, combine, concatenate
<relation> = has, belongsTo, through?
## collapseBy(field, [ <clauses> ])
## concatenate([ a, b, c ])
a UNION b UNION c, somehow
## combine([ a, b, c ])
a JOIN b JOIN c, somehow, taking into account column selection
## define({ name: <query> })
WITH <query> AS name
## define({ name: <relation> })
base JOIN <relation>; account for WHERE clauses on the <relation> specified in the top-level `where`, those may also be specified on the relation itself directly
normalize that in an optimizer!
does that also apply to withRelations?
## inline(<query>)
parenthesized subquery; possibly WITH if needed to make it referenceable elsewhere? need to think about this
## compute
Used to indicate derived values, eg.
a) sql(...) as column
b) database functions
c) JS postprocessing functions (not valid in collapseBy?)
GROUP BY color, size, ROLLUP (country_id, store_id)
for each country_id
for (each store_id + null)
for each color
for each size

@ -61,6 +61,7 @@
"eslint-plugin-react-hooks": "^4.0.5",
"express": "^4.17.1",
"nodemon": "^2.0.4",
"pick-random-weighted": "^1.2.3",
"react": "^16.13.1",
"react-dom": "^16.13.1",
"react-simple-code-editor": "^0.11.0"

@ -63,6 +63,17 @@ let is$Object = [
}
];
let is$ObjectArray = [
arrayOf([
required,
either([
[ isValue(NoQuery) ],
[ is$Object ]
])
]),
(items) => items.filter((item) => item !== NoQuery)
];
function $object({ query, params, placeholders }) {
validateOptions(arguments, is$ObjectParameters);
@ -76,17 +87,19 @@ function $object({ query, params, placeholders }) {
function $join(joiner, items) {
validateArguments(arguments, {
joiner: [ required, isString ],
items: [ required, arrayOf(is$Object) ]
items: [ required, is$ObjectArray ]
});
let nonEmptyItems = items.filter((item) => item !== NoQuery);
return $object({
query: items
query: nonEmptyItems
.map((item) => item.query)
.join(joiner),
params: items
params: nonEmptyItems
.map((item) => item.params)
.flat(),
placeholders: items
placeholders: nonEmptyItems
.map((item) => item.placeholders)
.flat()
});
@ -230,10 +243,27 @@ let process = {
return NoQuery;
}
});
let $groupByClause = asExpression(() => {
if (clausesByType.collapseBy.length > 0) {
// NOTE: We currently only support a single collapseBy clause
let collapseColumns = clausesByType.collapseBy[0].columns.columns;
return $combine`GROUP BY ${$join(", ", $handleAll(collapseColumns))}`;
} else {
return NoQuery;
}
});
let $columnSelection = columnList({ onlyColumns, addColumns });
return $combine`SELECT ${$columnSelection} FROM ${$table} ${$whereClause}`;
let $orderedClauses = [
$table,
$whereClause,
$groupByClause
];
return $combine`SELECT ${$columnSelection} FROM ${$join(" ", $orderedClauses)}`;
},
tableName: function ({ name }) {
// FIXME: escape
@ -256,6 +286,9 @@ let process = {
params: []
});
},
hierarchical: function ({ columns }) {
return $combine`ROLLUP (${$join(", ", $handleAll(columns))})`;
},
alias: function ({ column, expression }) {
// FIXME: escape
let $column = $handle(column);
@ -332,6 +365,13 @@ let process = {
params: [ placeholder ],
placeholders: [ placeholder.name ]
});
},
aggregrateFunction: function ({ functionName, args }) {
let $functionName = $object({
query: functionName.toUpperCase()
});
return $combine`${$functionName}(${$join(", ", $handleAll(args))})`;
}
};

@ -0,0 +1,20 @@
"use strict";
module.exports = function combineOptimizers(optimizers) {
let allVisitors = {};
for (let optimizer of optimizers) {
for (let [ key, visitor ] of Object.entries(optimizer.visitors)) {
if (allVisitors[key] == null) {
allVisitors[key] = [];
}
allVisitors[key].push({
name: optimizer.name,
func: visitor
});
}
}
return allVisitors;
};

@ -0,0 +1,16 @@
"use strict";
const debug = require("debug");
module.exports = function createDebuggers(optimizers) {
let debuggers = {};
for (let optimizer of optimizers) {
debuggers[optimizer.name] = debug(`raqb:ast:optimize:${optimizer.name}`);
debuggers[`${optimizer.name} (deferred)`] = debug(`raqb:ast:optimize:${optimizer.name} (deferred)`);
}
debuggers["(subtree change)"] = debug(`raqb:ast:optimize:(subtree change)`);
return debuggers;
};

@ -0,0 +1,27 @@
"use strict";
module.exports = function createHandlerTracker() {
let handlers = new Map();
return {
add: function (name, func) {
if (!handlers.has(name)) {
handlers.set(name, []);
}
handlers.get(name).push(func);
},
call: function (name, value) {
let funcs = handlers.get(name);
if (funcs != null) {
for (let func of funcs) {
func(value);
}
}
},
has: function (name) {
return handlers.has(name);
}
};
};

@ -1,177 +1,236 @@
/* eslint-disable no-loop-func */
"use strict";
// Design note: We return stateLogs instead of passing in an object of registered handlers to call, because a node can become obsolete in mid-processing, and in those cases all of its state sets should be ignored. By far the easiest way to implement this, is to just keep a stateLog in the node handling context (since that entire context gets thrown away when processing gets aborted due to a subtree change), and let the parent deal with actually applying any still-relevant setStates to the correct handler functions.
// FIXME: Figure out a way to track 'loss factor' per optimizer, ie. how many (partial or complete) node evaluations have been discarded due to the actions of that optimizer, including subtrees. This can give insight into which optimizers cause unreasonably much wasted work.
const util = require("util");
const syncpipe = require("syncpipe");
const debug = require("debug");
const splitFilter = require("split-filter");
const mapObj = require("map-obj");
const defaultValue = require("default-value");
const NoChange = require("../../optimizers/util/no-change");
const RemoveNode = require("../../optimizers/util/remove-node");
const unreachable = require("../../unreachable");
const typeOf = require("../../type-of");
const concat = require("../../concat");
const deriveNode = require("../../derive-node");
const measureTime = require("../../measure-time");
const unreachable = require("../../unreachable");
// FIXME: Consider deepcopying the tree once, and then mutating that tree, instead of doing everything immutably; this might be significantly faster when a few iterations are needed to stabilize the tree, as that might otherwise result in many copies of the subtree(s) leeding up to the changed node(s), one for each iteration.
// FIXME: Consider whether inverting the evaluation order (deepest-first rather than shallowest-first) can remove the need for multiple optimization passes and stabilization detection.
// FIXME: Verify that changed nodes actually result in a change in where the walker goes!
const createHandlerTracker = require("./handler-tracker");
const createTimings = require("./timings-tracker");
const combineOptimizers = require("./combine-optimizers");
const createDebuggers = require("./create-debuggers");
function createDebuggers(optimizers) {
let debuggers = {};
// FIXME: Implement a scope tracker of some sort, to decouple the code here a bit more
for (let optimizer of optimizers) {
debuggers[optimizer.name] = debug(`raqb:ast:optimize:${optimizer.name}`);
}
let EVALUATION_LIMIT = 10;
return debuggers;
function defer(func) {
return { __type: "defer", func: func };
}
function createTimings(optimizers) {
let timings = {};
function handleNodeChildren(node, handle) {
let changedProperties = {};
let stateLogs = [];
for (let optimizer of optimizers) {
// timings[optimizer.name] = 0n;
timings[optimizer.name] = 0;
}
for (let [ property, value ] of Object.entries(node)) {
if (value == null) {
continue;
} else if (value.__raqbASTNode === true) {
let result = handle(value);
return timings;
}
if (result.stateLog.length > 0) {
stateLogs.push(result.stateLog);
}
function combineOptimizers(optimizers) {
let allVisitors = {};
if (result.node !== value) {
changedProperties[property] = result.node;
}
} else if (Array.isArray(value) && value.length > 0 && value[0].__raqbASTNode === true) {
// NOTE: We assume that if an array in an AST node property contains one AST node, *all* of its items are AST nodes. This should be ensured by the input wrapping in the operations API.
// eslint-disable-next-line no-loop-func
let results = value.map((item) => handle(item));
let newStateLogs = results
.filter((result) => result.stateLog.length > 0)
.map((result) => result.stateLog);
for (let optimizer of optimizers) {
for (let [ key, visitor ] of Object.entries(optimizer.visitors)) {
if (allVisitors[key] == null) {
allVisitors[key] = [];
if (newStateLogs.length > 0) {
stateLogs.push(... newStateLogs);
}
allVisitors[key].push({
name: optimizer.name,
func: visitor
});
let newNodes = results.map((result) => result.node);
let hasChangedItems = newNodes.some((newNode, i) => newNode !== value[i]);
if (hasChangedItems) {
changedProperties[property] = newNodes.filter((item) => item !== RemoveNode);
}
} else {
// Probably some kind of literal value; we don't touch these.
continue;
}
}
return allVisitors;
return {
changedProperties: changedProperties,
stateLog: concat(stateLogs)
};
}
// FIXME: StopMatching marker to signal that eg. a generic visitor should no longer match after a specific one?
// FIXME: OriginalNode marker to explicitly indicate that any transformations applied by *other* visitors should be thrown out?
module.exports = function optimizeTree(ast, optimizers) {
// NOTE: Depth-first!
let debuggers = createDebuggers(optimizers);
let visitors = combineOptimizers(optimizers);
let timings = createTimings(optimizers);
let debuggers = createDebuggers(optimizers);
// FIXME: Dirty tracking for stabilization detection
function applyVisitors(node, visitors) {
if (visitors == null) {
// We handle this here to make the `handle` pipeline more readable
return node;
} else {
let lastNode = node;
let visitorsByType = mapObj(visitors, (key, value) => {
return [
key,
concat([
defaultValue(value, []),
defaultValue(visitors["*"], []),
])
];
});
for (let visitor of visitors) {
// eslint-disable-next-line no-loop-func
let { value: result, time } = measureTime(() => {
return visitor.func(lastNode);
});
function handleNode(node, iterations = 0) {
// The stateLog contains a record of every setState call that was made during the handling of this node and its children. We keep a log for this rather than calling handlers directly, because setState calls should always apply to *ancestors*, not to the current node. That is, if the current node does a setState for `foo`, and also has a handler registered for `foo`, then that handler should not be called, but the `foo` handler in the *parent* node should be.
// FIXME: Scope stateLog entries by optimizer name? To avoid name clashes for otherwise similar functionality. Like when multiple optimizers track column names.
let stateLog = [];
let defers = [];
let handlers = createHandlerTracker();
let nodeVisitors = visitorsByType[node.type];
function handleResult({ debuggerName, result, permitDefer }) {
if (result === NoChange) {
// no-op
} else if (result == null) {
// FIXME: Improve this error so that it actually tells you in what visitor things broke
throw new Error(`A visitor is not allowed to return null or undefined; if you intended to leave the node untouched, return a NoChange marker instead`);
} else if (result === RemoveNode) {
debuggers[debuggerName](`Node of type '${typeOf(node)}' removed`);
return { node: RemoveNode, stateLog: [] };
} else if (result.__type === "defer") {
if (permitDefer) {
debuggers[debuggerName](`Defer was scheduled for node of type '${typeOf(node)}'`);
defers.push({ debuggerName, func: result.func });
} else {
throw new Error(`Cannot schedule a defer from within a defer handler`);
}
} else if (result.__raqbASTNode === true) {
if (result === node) {
// Visitor returned the original node again; but in this case, it should return NoChange instead. We enforce this because after future changes to the optimizer implementation (eg. using an internally-mutable deep copy of the tree), we may no longer be able to *reliably* detect when the original node is returned; so it's best to already get people into the habit of returning a NoChange marker in those cases, by disallowing this.
throw new Error(`Visitor returned original node, but this may not work reliably; if you intended to leave the node untouched, return a NoChange marker instead`);
} else {
debuggers[debuggerName](`Node of type '${typeOf(node)}' replaced by node of type '${typeOf(result)}'`);
timings[visitor.name] += time;
if (result === NoChange) {
// no-op
} else if (result == null) {
throw new Error(`A visitor is not allowed to return null or undefined; if you intended to leave the node untouched, return a NoChange marker instead`);
} else if (result === RemoveNode) {
debuggers[visitor.name](`Node of type '${typeOf(lastNode)}' removed`);
lastNode = RemoveNode;
break; // Node has gone stale, stop applying visitors to it
} else if (result.__raqbASTNode === true) {
// New subtree to replace the old one
if (result === node) {
// Visitor returned the original node again; but in this case, it should return NoChange instead. We enforce this because after future changes to the optimizer implementation (eg. using an internally-mutable deep copy of the tree), we may no longer be able to *reliably* detect when the original node is returned; so it's best to already get people into the habit of returning a NoChange marker in those cases, by disallowing this.
throw new Error(`Visitor returned original node, but this may not work reliably; if you intended to leave the node untouched, return a NoChange marker instead`);
if (iterations >= EVALUATION_LIMIT) {
throw new Error(`Exceeded evaluation limit in optimizer ${debuggerName}; aborting optimization. If you are a user of raqb, please report this as a bug. If you are writing an optimizer, make sure that your optimizer eventually stabilizes on a terminal condition (ie. NoChange)!`);
} else {
debuggers[visitor.name](`Node of type '${typeOf(lastNode)}' replaced by node of type '${typeOf(result)}'`);
lastNode = result;
break; // Node has gone stale, stop applying visitors to it
return handleNode(result, iterations + 1);
}
} else {
throw new Error(`Visitor returned an unexpected type of return value: ${util.inspect(result)}`);
}
}
if (lastNode !== node) {
// We re-evalue the new node before leaving control to the children handler, as the old one has been substituted, and therefore new visitors might be applicable.
return handleSelf(lastNode);
} else {
return lastNode;
throw new Error(`Visitor returned an unexpected type of return value: ${util.inspect(result)}`);
}
}
}
function handleSelf(node) {
return syncpipe(node, [
(_) => applyVisitors(_, visitors[_.type]),
(_) => applyVisitors(_, visitors["*"]),
]);
}
function applyVisitorFunction({ visitorName, func, node, permitDefer }) {
let { value: result, time } = measureTime(() => {
return func(node, {
// eslint-disable-next-line no-loop-func
setState: (name, value) => {
// FIXME: util.inspect is slow, and not necessary when debug mode is disabled
debuggers[visitorName](`Setting state for '${name}' from node of type '${typeOf(node)}': ${util.inspect(value, { colors: true })}`);
stateLog.push({ name, value });
},
registerStateHandler: (name, func) => handlers.add(name, func),
defer: (permitDefer === true) ? defer : null
});
});
function handleChildren(node) {
// FIXME: Eventually hardcode the available properties for different node types (and them being single/multiple), for improved performance?
let changedProperties = {};
for (let [ property, value ] of Object.entries(node)) {
if (value == null) {
continue;
} else if (value.__raqbASTNode === true) {
changedProperties[property] = handle(value);
} else if (Array.isArray(value) && value.length > 0 && value[0].__raqbASTNode === true) {
// NOTE: We assume that if an array in an AST node property contains one AST node, *all* of its items are AST nodes. This should be ensured by the input wrapping in the operations API.
changedProperties[property] = value
.map((item) => handle(item))
.filter((item) => item !== RemoveNode);
} else {
// Probably some kind of literal value; we don't touch these.
continue;
}
}
timings[visitorName] += time;
if (Object.keys(changedProperties).length === 0) {
return node;
} else {
let newNode = Object.assign({}, node, changedProperties);
// FIXME: Think carefully about whether there is *ever* a valid reason to remove a single node! As array items are already taken care of above, and leave an empty array at worst, which can make sense. Possibly we even need to encode this data into node type metadata.
for (let [ key, value ] of Object.entries(newNode)) {
if (value === RemoveNode) {
delete newNode[key];
return result;
}
if (nodeVisitors != null) {
for (let visitor of nodeVisitors) {
let handled = handleResult({
debuggerName: visitor.name,
result: applyVisitorFunction({
visitorName: visitor.name,
func: visitor.func,
node: node,
permitDefer: true
}),
permitDefer: true
});
if (handled != null) {
// Handling of the current node was aborted
return handled;
}
}
return newNode;
}
}
let childResult = handleNodeChildren(node, handleNode);
function handle(node) {
// FIXME: Possibly optimize the "node gets returned unchanged" case, somehow? Perhaps by propagating the NoChange marker? But object creation is fast, so that may actually make things slower than just blindly creating new objects...
return syncpipe(node, [
(_) => handleSelf(_),
(_) => handleChildren(_)
]);
if (Object.keys(childResult.changedProperties).length > 0) {
let newNode = deriveNode(node, childResult.changedProperties);
// We already know that the new node is a different one, but let's just lead it through the same handleResult process, for consistency. Handling of the pre-child-changes node is aborted here, and we re-evaluate with the new node.
return handleResult({
debuggerName: "(subtree change)",
result: newNode,
permitDefer: false
});
}
if (childResult.stateLog.length > 0) {
let [ relevantState, otherState ] = splitFilter(childResult.stateLog, (entry) => handlers.has(entry.name));
stateLog = stateLog.concat(otherState);
for (let item of relevantState) {
// FIXME: Log these, and which visitor they originate from
handlers.call(item.name, item.value);
}
}
for (let defer of defers) {
let handled = handleResult({
debuggerName: `${defer.debuggerName} (deferred)`,
result: applyVisitorFunction({
visitorName: defer.debuggerName,
func: defer.func,
node: node,
permitDefer: false
}),
permitDefer: false
});
if (handled != null) {
// Handling of the current node was aborted
return handled;
}
}
return {
stateLog: stateLog,
node: node
};
}
let { value: rootNode, time } = measureTime(() => {
return handle(ast);
let { value: rootResult, time } = measureTime(() => {
return handleNode(ast);
});
// let timeSpentInOptimizers = Object.values(timings).reduce((sum, n) => sum + n, 0n);
let timeSpentInOptimizers = Object.values(timings).reduce((sum, n) => sum + n, 0);
if (rootNode !== RemoveNode) {
if (rootResult.node !== RemoveNode) {
return {
ast: rootNode,
ast: rootResult.node,
timings: {
"# Total": time,
"# Walker overhead": time - timeSpentInOptimizers,

@ -0,0 +1,12 @@
"use strict";
module.exports = function createTimings(optimizers) {
let timings = {};
for (let optimizer of optimizers) {
// timings[optimizer.name] = 0n;
timings[optimizer.name] = 0;
}
return timings;
};

@ -1,5 +1,11 @@
"use strict";
module.exports = function concat(arrays) {
return arrays[0].concat(... arrays.slice(1));
if (arrays.length === 0) {
return [];
} else if (arrays.length === 1) {
return arrays[0];
} else {
return arrays[0].concat(... arrays.slice(1));
}
};

@ -0,0 +1,8 @@
"use strict";
const merge = require("./merge");
const node = require("./ast-node");
module.exports = function deriveNode(source, newProperties) {
return node(merge(source, newProperties));
};

@ -0,0 +1,11 @@
"use strict";
module.exports = function findLastIndex(array, predicate) {
for (let i = array.length - 1; i >= 0; i--) {
if (predicate(array[i])) {
return i;
}
}
return -1;
};

@ -15,7 +15,7 @@ module.exports = function (operations) {
return function alias(_name, _expression) {
let [ name, expression ] = validateArguments(arguments, {
name: [ required, either([
[ isObjectType("column") ],
[ isObjectType("columnName") ],
[ isString, wrapWithOperation("column") ]
])],
expression: [ required, isValueExpression ]

@ -0,0 +1,28 @@
"use strict";
const { validateArguments } = require("@validatem/core");
const required = require("@validatem/required");
const arrayOf = require("@validatem/array-of");
const node = require("../ast-node");
module.exports = function (operations) {
const isCollapsibleColumn = require("../validators/operations/is-collapsible-column")(operations);
const isCollapseByClause = require("../validators/operations/is-collapse-by-clause")(operations);
return function collapseBy(_columns) {
let [ columns, clauses ] = validateArguments(arguments, {
columns: [ required, arrayOf([ required, isCollapsibleColumn ]) ],
clauses: [ arrayOf([ isCollapseByClause ]) ]
});
return node({
type: "collapseBy",
columns: node({
type: "collapseByColumns",
columns: columns
}),
clauses: clauses
});
};
};

@ -0,0 +1,47 @@
"use strict";
const { validateArguments } = require("@validatem/core");
const either = require("@validatem/either");
const required = require("@validatem/required");
const anyProperty = require("@validatem/any-property");
const isString = require("@validatem/is-string");
const unreachable = require("../unreachable");
const node = require("../ast-node");
const tagAsType = require("../validators/tag-as-type");
module.exports = function (operations) {
const isComputable = require("../validators/operations/is-computable")(operations);
return function compute(_items) {
let [ items ] = validateArguments(arguments, {
// FIXME: Add support for scalar compute
// items: [ required, either([
// [ isComputable, tagAsType("single") ],
// [ anyProperty({
// key: [ required, isString ],
// value: [ required, isComputable ]
// }), tagAsType("multiple") ]
// ]) ]
items: [ required, anyProperty({
key: [ required, isString ],
value: [ required, isComputable ]
}), tagAsType("multiple") ]
});
if (items.type === "multiple") {
return node({
type: "computeMultiple",
items: Object.entries(items.value).map(([ key, value ]) => {
return node({
type: "compute",
column: operations.column(key),
expression: value
});
})
});
} else {
unreachable(`Invalid tagged type '${items.type}'`);
}
};
};

@ -0,0 +1,27 @@
"use strict";
const { validateArguments } = require("@validatem/core");
const required = require("@validatem/required");
const node = require("../ast-node");
module.exports = function count(operations) {
const isTable = require("../validators/operations/is-table")(operations);
return function count(_table) {
let [ table ] = validateArguments(arguments, {
table: [ isTable ]
});
// TODO: Investigate whether this can be made more performant by counting a specific column rather than *. That would probably require stateful knowledge of the database schema, though.
let columnReference = (table != null)
? operations.foreignColumn({ table: table, column: "*" }) // FIXME: Make sure not to break this (internally) when making column name checks more strict
: operations.column("*");
return node({
type: "aggregrateFunction",
functionName: "count",
args: [ columnReference ]
});
};
};

@ -0,0 +1,22 @@
"use strict";
const { validateArguments } = require("@validatem/core");
const required = require("@validatem/required");
const arrayOf = require("@validatem/array-of");
const node = require("../ast-node");
module.exports = function (operations) {
const isPossiblyForeignColumn = require("../validators/operations/is-possibly-foreign-column")(operations);
return function hierarchical(_columns) {
let [ columns ] = validateArguments(arguments, {
columns: [ required, arrayOf([ required, isPossiblyForeignColumn ]) ] // FIXME: Require minimum 2, probably
});
return node({
type: "hierarchical",
columns: columns
});
};
};

@ -136,25 +136,49 @@ let operations = {
};
let operationModules = {
// Base operations
select: require("./select"),
// Column selection
addColumns: require("./add-columns"),
onlyColumns: require("./only-columns"),
alias: require("./alias"),
// Reference/scalar types
column: require("./column"),
foreignColumn: require("./column"),
table: require("./table"),
value: require("./value"),
// Filtering
where: require("./where"),
expression: require("./expression"),
// Predicate lists/combinators
allOf: require("./all-of"),
anyOf: require("./any-of"),
column: require("./column"),
// Conditions
equals: require("./equals"),
expression: require("./expression"),
foreignColumn: require("./column"),
lessThan: require("./less-than"),
moreThan: require("./more-than"),
not: require("./not"),
onlyColumns: require("./only-columns"),
// Collapsing/grouping
collapseBy: require("./collapse-by"),
hierarchical: require("./hierarchical"),
// Computation
compute: require("./compute"),
// Aggregrate functions
count: require("./count"),
sum: require("./sum"),
// Misc.
parameter: require("./parameter"),
postProcess: require("./post-process"),
select: require("./select"),
table: require("./table"),
unsafeSQL: require("./unsafe-sql"),
value: require("./value"),
where: require("./where"),
};
Object.assign(module.exports, operations);

@ -0,0 +1,22 @@
"use strict";
const { validateArguments } = require("@validatem/core");
const required = require("@validatem/required");
const node = require("../ast-node");
module.exports = function count(operations) {
const isPossiblyForeignColumn = require("../validators/operations/is-possibly-foreign-column")(operations);
return function sum(_column) {
let [ column ] = validateArguments(arguments, {
column: [ required, isPossiblyForeignColumn ]
});
return node({
type: "aggregrateFunction",
functionName: "sum",
args: [ column ]
});
};
};

@ -6,4 +6,6 @@ module.exports = [
require("./flatten-not-predicates"),
require("./flatten-predicate-lists"),
require("./arrayify-predicate-lists"),
require("./set-collapse-by-columns"),
// require("./test-context"),
];

@ -0,0 +1,157 @@
"use strict";
const NoChange = require("./util/no-change");
const deriveNode = require("../derive-node");
const operations = require("../operations");
const typeOf = require("../type-of");
const unreachable = require("../unreachable");
const concat = require("../concat");
const uniqueByPredicate = require("../unique-by-predicate");
// FIXME: Support for foreign column names
function uniqueColumns(columns) {
return uniqueByPredicate(columns, (column) => column.name);
}
/*
valid columns when collapsing:
- columns that appear in the collapseBy column list, within or without a hierarchical wrapper
- any column that is wrapped in an aggregrate function of some sort
*/
module.exports = {
name: "set-collapse-by-columns",
category: [ "normalization" ],
visitors: {
collapseBy: (node, { setState }) => {
setState("isCollapsing", true);
return NoChange;
},
columnName: (node, { setState }) => {
setState("columnSeen", node);
return NoChange;
},
// FIXME: Think of a generic way to express "only match columns under this specific child property"
collapseByColumns: (node, { registerStateHandler, setState, defer }) => {
let columns = [];
registerStateHandler("columnSeen", (node) => {
columns.push(node);
});
return defer(() => {
setState("setCollapsedColumns", columns);
return NoChange;
});
},
addColumns: (node, { setState }) => {
setState("setAddColumns", node.columns);
return NoChange;
},
onlyColumns: (node, { setState }) => {
setState("setOnlyColumns", node.columns);
return NoChange;
},
aggregrateFunction: (node, { registerStateHandler }) => {
// FIXME: Also report isCollapsing here, due to aggregrate function use, but make sure that the error describes this as the (possible) cause
return NoChange;
},
compute: (node, { setState }) => {
setState("computeSeen", node);
return NoChange;
},
select: (node, { registerStateHandler, defer }) => {
let isCollapsing;
let onlyColumns = [];
let addColumns = [];
let computes = [];
let collapsedColumns;
registerStateHandler("isCollapsing", (value) => {
isCollapsing = isCollapsing || value;
});
registerStateHandler("setCollapsedColumns", (columns) => {
if (collapsedColumns == null) {
collapsedColumns = columns;
} else {
throw new Error(`You can currently only specify a single 'collapseBy' clause. Please file an issue if you have a reason to need more than one!`);
}
});
registerStateHandler("setOnlyColumns", (columns) => {
onlyColumns = onlyColumns.concat(columns);
});
registerStateHandler("setAddColumns", (columns) => {
addColumns = addColumns.concat(columns);
});
registerStateHandler("computeSeen", (node) => {
computes.push(node);
});
return defer(() => {
if (isCollapsing) {
if (addColumns.length > 0) {
let extraColumnNames = addColumns.map((column) => column.name);
throw new Error(`You tried to add extra columns (${extraColumnNames.join(", ")}) in your query, but this is not possible when using collapseBy. See [FIXME: link] for more information, and how to solve this.`);
} else if (onlyColumns.length > 0) {
// NOTE: This can happen either because the user specified an onlyColumns clause, *or* because a previous run of this optimizer did so!
let uniqueSelectedColumns = uniqueColumns(onlyColumns);
let collapsedColumnNames = collapsedColumns.map((column) => column.name);
let invalidColumnSelection = uniqueSelectedColumns.filter((node) => {
let isAggregrateComputation = typeOf(node) === "alias" && typeOf(node.expression) === "aggregrateFunction";
let isCollapsedColumn = typeOf(node) === "columnName" && collapsedColumnNames.includes(node.name);
let isValid = isAggregrateComputation || isCollapsedColumn;
return !isValid;
});
// FIXME: We can probably optimize this by marking the optimizer-created onlyColumns as inherently-valid, via some sort of node metadata mechanism
if (invalidColumnSelection.length > 0) {
let invalidColumnNames = invalidColumnSelection.map((column) => {
let columnType = typeOf(column);
if (columnType === "columnName") {
return column.name;
} else if (columnType === "alias") {
// FIXME: Show alias target instead of column name here?
return column.column.name;
} else {
return unreachable(`Encountered '${columnType}' node in invalid columns`);
}
});
throw new Error(`You tried to include one or more columns in your query (${invalidColumnNames.join(", ")}), that are not used in a collapseBy clause or aggregrate function. See [FIXME: link] for more information.`);
} else {
return NoChange;
}
} else {
let computeAliases = computes.map((node) => {
return operations.alias(node.column, node.expression);
});
return deriveNode(node, {
clauses: node.clauses.concat([
operations.onlyColumns(concat([
collapsedColumns,
computeAliases
]))
])
});
}
}
});
},
}
};
// FIXME: a ConsumeNode marker, like RemoveNode but it does not invalidate that node's state... may need to actually make it a reference, so that a parent node can decide whether to consume that node. Basically passing a "consume this node" function as a state value, that correctly internally triggers the optimizer infrastructure to change the tree as a result.
// FIXME: Consume the compute nodes, and have an optimizer that removes empty computeMultiple nodes

@ -0,0 +1,85 @@
"use strict";
const NoChange = require("./util/no-change");
const deriveNode = require("../derive-node");
const operations = require("../operations");
module.exports = {
name: "test-context",
category: [ "testing" ],
visitors: {
columnName: (node, { setState }) => {
setState("seenColumn", node.name);
return NoChange;
},
select: (node, { registerStateHandler, defer }) => {
let seenColumns = new Set();
registerStateHandler("seenColumnsInWhere", (names) => {
for (let name of names) {
seenColumns = seenColumns.add(name);
}
});
// FIXME: Definitely need better AST modification/derivation tools... probably some sort of deep-modifying utility, for starters. Maybe merge-by-template can be of use here? With a custom AST node merger? It probably doesn't support non-enumerable properties correctly right now, though...
return defer(() => {
console.log("Seen columns in WHERE in SELECT:", seenColumns);
let onlyColumnsClause = node.clauses.find((clause) => clause.type === "onlyColumns");
let columnsAlreadyAdded = onlyColumnsClause != null && Array.from(seenColumns).every((column) => {
return onlyColumnsClause.columns.some((existingColumn) => existingColumn.name === column);
});
if (!columnsAlreadyAdded) {
// NOTE: This is a good test case for optimizer stability! Just returning a derived node in every case.
let newOnlyColumnsClause = (onlyColumnsClause == null)
? operations.onlyColumns(Array.from(seenColumns))
: deriveNode(onlyColumnsClause, {
columns: onlyColumnsClause.columns.concat(Array.from(seenColumns).map((columnName) => {
return operations.column(columnName);
}))
});
return deriveNode(node, {
clauses: node.clauses
.filter((clause) => clause.type !== "onlyColumns")
.concat([ newOnlyColumnsClause ])
});
} else {
return NoChange;
}
});
},
where: (node, { registerStateHandler, defer, setState }) => {
let seenColumns = [];
registerStateHandler("seenColumn", (name) => seenColumns.push(name));
return defer(() => {
setState("seenColumnsInWhere", seenColumns);
return NoChange;
});
// let seenColumns = [];
// let id = Math.random();
// registerStateHandler("seenColumn", (name) => {
// seenColumns.push(name);
// });
// console.log("Scheduling defer", id);
// return defer(() => {
// console.log("Defer called", id);
// // MARKER: This gets called twice, but should only be called once!
// // console.log("Seen columns in WHERE:", seenColumns, require("util").inspect(node, {colors:true,depth:null}));
// console.log("Seen columns in WHERE:", seenColumns);
// return NoChange;
// });
}
}
};

@ -0,0 +1,15 @@
"use strict";
module.exports = function uniqueByPredicate(items, predicate) {
let seen = new Set();
return items.filter((item) => {
let key = predicate(item);
if (seen.has(key)) {
return false;
} else {
return true;
}
});
};

@ -0,0 +1,7 @@
"use strict";
module.exports = function (operations) {
const isObjectType = require("./is-object-type")(operations);
return isObjectType("aggregrateFunction");
};

@ -0,0 +1,14 @@
"use strict";
const either = require("@validatem/either");
module.exports = function (operations) {
const isObjectType = require("./is-object-type")(operations);
const isComputeClause = require("./is-compute-clause")(operations);
return either([
[ isObjectType("where") ],
[ isComputeClause ],
// [ isObjectType("withRelations") ], // FIXME: Implement support for this
]);
};

@ -0,0 +1,13 @@
"use strict";
const either = require("@validatem/either");
module.exports = function (operations) {
const isObjectType = require("./is-object-type")(operations);
const isPossiblyForeignColumn = require("./is-possibly-foreign-column")(operations);
return either([
isPossiblyForeignColumn,
isObjectType("hierarchical")
]);
};

@ -0,0 +1,16 @@
"use strict";
const either = require("@validatem/either");
module.exports = function (operations) {
const isObjectType = require("./is-object-type")(operations);
const isPossiblyForeignColumn = require("./is-possibly-foreign-column")(operations);
return either([
isObjectType("sqlExpression"),
isObjectType("aggregrateFunction"),
isObjectType("valueFrom"),
isObjectType("literalValue"),
isPossiblyForeignColumn
]);
};

@ -0,0 +1,12 @@
"use strict";
const either = require("@validatem/either");
module.exports = function (operations) {
const isObjectType = require("./is-object-type")(operations);
return either([
isObjectType("computeMultiple"),
isObjectType("computeSingle"),
]);
};

@ -4,12 +4,15 @@ const either = require("@validatem/either");
module.exports = function (operations) {
const isObjectType = require("./is-object-type")(operations);
const isComputeClause = require("./is-compute-clause")(operations);
return either([
[ isObjectType("where") ],
[ isComputeClause ],
[ isObjectType("addColumns") ],
[ isObjectType("onlyColumns") ],
[ isObjectType("withRelations") ],
[ isObjectType("postProcess") ],
[ isObjectType("collapseBy") ],
]);
};

@ -8,6 +8,6 @@ module.exports = function (operations) {
return either([
isObjectType("alias"),
isPossiblyForeignColumn,
isPossiblyForeignColumn, // FIXME: Think about whether we actually want to permit foreign columns here, without an alias.
]);
};

@ -0,0 +1,15 @@
"use strict";
const wrapError = require("@validatem/wrap-error");
const either = require("@validatem/either");
const isString = require("@validatem/is-string");
module.exports = function (operations) {
const isObjectType = require("./is-object-type")(operations);
const wrapWithOperation = require("./wrap-with-operation")(operations);
return wrapError("Must be a table name or object", either([
[ isObjectType("tableName") ],
[ isString, wrapWithOperation("table") ]
]));
};

@ -11,12 +11,14 @@ const isLiteralValue = require("../is-literal-value");
module.exports = function (operations) {
const isObjectType = require("./is-object-type")(operations);
const isColumnObject = require("./is-column-object")(operations);
const isAggregrateFunction = require("./is-aggregrate-function")(operations);
const wrapWithOperation = require("./wrap-with-operation")(operations);
return wrapError("Must be a type of value", either([
[ isObjectType("sqlExpression") ],
[ isObjectType("literalValue") ],
[ isObjectType("placeholder") ], // TODO: Verify that this also works for the `alias` method
[ isAggregrateFunction ],
[ isColumnObject ],
[ isLiteralValue, wrapWithOperation("value") ]
]));

@ -4422,6 +4422,11 @@ pem@^1.13.2:
os-tmpdir "^1.0.1"
which "^2.0.2"
pick-random-weighted@^1.2.3:
version "1.2.3"
resolved "https://registry.yarnpkg.com/pick-random-weighted/-/pick-random-weighted-1.2.3.tgz#3d337543ff59b53c7aad17aa97560981a4ac0311"
integrity sha512-ndsh+IVYLj5TCLOs1R8N0/8F9npfSNORpmArX0ahw4GFzZ2rpM9Orkbkwp+q16QAE7QDaLWn+4EsEnxI7VMDJA==
picomatch@^2.0.4, picomatch@^2.2.1:
version "2.2.2"
resolved "https://registry.yarnpkg.com/picomatch/-/picomatch-2.2.2.tgz#21f333e9b6b8eaff02468f5146ea406d345f4dad"

Loading…
Cancel
Save