You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

498 lines
19 KiB
JavaScript

/* eslint-disable no-undef */
"use strict";
// JOIN
query = combine([
select("nodes", [
where({ id: parameter("id") })
]),
select("node_revisions", [
where({ node_id: foreignColumn("nodes.id") })
])
]);
// UNION
let whereClause = { id: parameter("id") };
query = concatenate([
select("old_nodes", [ whereClause ]),
select("new_nodes", [ whereClause ]),
]);
// GROUP BY / aggregrates
query = select("weather_reports", [
where({ city: startsWith("S") }),
collapseBy("city", [
compute({ maximum_temperature: highestOf("temperature") }),
where({ maximum_temperature: lessThan(40) })
])
]);
query = select("reviews", [
where({ rating: moreThan(3) }),
collapseBy([ "movie_source", "movie_id" ], [
compute({ positive_review_count: count() })
])
]);
// movies: data_source, data_id, title, PRIMARY(data_source, data_id)
// reviews: id, movie_source, movie_id, rating, FOREIGN(movie_source, movie_id) -> movies .data_source, .data_id
// intended output: [ title, count ] for each movie whose title starts with an A
// NOTE: Conceptually, a relation is a 'virtual table' that references all the associated foreign rows for a given local row
// For something like the below, actually return the `movie.title` field as a column named exactly that by default; using quotes to be able to include the dot. Nevertheless the user might want to *rename* certain columns in a constrained query like a collapseBy query, where the columns are normally automatically determined based on the specified computations and grouping conditions; neither addColumns nor onlyColumns make semantic sense for that (the latter should be reserved for removing certain columns from the results, and *should* work with collapseBy, just with constrained valid column names!), so we probably need a renameColumn construct that gets rid of the original `movie.title` column and eg. replaces it with `movie_title`.
// FIXME: Think carefully about how the semantics of belongsTo would change when used in defineAs rather than withRelations. Ideally the semantics shouldn't change at all. However, if it's also possible for someone to specify a `has` relation (which points at *multiple* foreign rows), then we need a way to deal with zero-or-more rows here anyway, maybe that can be the same semantics as in withRelations?
// I guess that "there must be at least one foreign row that matches the specified constraints" is a reasonable criterium for the local row to be includer or not? Then there should probably be a LIMIT 1 on the foreign query, to ensure that it only ever uses the values of whatever the first-encountered row is. This means there would be undefined behaviour when referring variable fields, though.
// NOTE: Make sure that any design around this is also capable of dealing with composite keys! Maybe require a composite(...) wrapper for those, to distinguish from other array-ish things...
// Definitely need this to distinguish from GROUPING SETS in GROUP BY (which is also an array); otherwise ["brand", "size"] would be ambiguous.
/*
SELECT
reviews.movie_id,
COUNT(reviews.*) AS positive_review_count
FROM reviews
JOIN
movies ON reviews.movie_id = movies.id
WHERE
reviews.rating > 3
GROUP BY reviews.movie_id;
*/
query = select("reviews", [
define({ movie: belongsTo("movie_id") }),
where({
rating: moreThan(3),
movie: { title: includes("Movie") }
}),
collapseBy("movie_id", [
compute({
positive_review_count: count(),
title: "movie.title"
}),
])
]);
// FIXME: What if we were to collapse by ID and then want to also get the movie title?
// reviews: id, movie_id, rating
// movies: id, title, release_year
query = select("movies", [
define({ reviews: has("reviews.movie_id") }),
where({
title: startsWith("A"),
reviews: { rating: moreThan(3) }
}),
collapseBy("release_year", [
compute({ positive_review_count: count("reviews") }), // FIXME: Ambiguity between table and column names here, maybe just default to table? Since column counts are rare
])
]);
// The below is functionally identical to the above; it is generally permitted to specify WHERE constraints for `define`d relations in the top-level WHERE
query = select("movies", [
define({ reviews: has("reviews.movie_id", [ where({ rating: moreThan(3) }) ]) }),
where({ title: contains("Movie") }),
collapseBy("release_year", [
compute({ positive_review_count: count("reviews") }), // FIXME: Ambiguity between table and column names here, maybe just default to table? Since column counts are rare
])
]);
// reviews: id, movie_id, rating
// movies: id, title, release_year
// SELECT movies.release_year, COUNT(reviews.*) AS positive_review_count FROM movies JOIN reviews ON movies.id = reviews.movie_id WHERE reviews.rating > 3 AND movies.title LIKE '%Movie%' GROUP BY movies.release_year;
// Differences from query below:
// 1) GROUP BY movies.release_year instead of primary key,
// 2) SELECT movies.release_year instead of movies.*, as per collapseBy semantics
// 3) Additional WHERE clause for movies.title
query = select("movies", [
where({ title: contains("Movie") }),
collapseBy("release_year", [
compute({
positive_review_count: count(has("reviews.movie_id", [
{ rating: moreThan(3) }
]))
}),
])
]);
/*
SELECT
movies.*,
COUNT(reviews.*) AS positive_review_count
FROM movies
JOIN
reviews ON movies.id = reviews.movie_id
WHERE reviews.rating > 3
GROUP BY movies.id;
*/
// NOTE: The GROUP BY is necessary because otherwise COUNT (which is an aggregrate function) cannot be used; the field name to use there, however, cannot be statelessly determined as it requires information about the primary key of the table
// SELECT movies.*, COUNT(reviews.*) OVER (PARTITION BY reviews.movie_id) AS positive_review_count FROM movies JOIN reviews ON movies.id = reviews.movie_id WHERE reviews.rating > 3;
// ^ Produces the correct data, but duplicates rows; window functions may be useful for combining aggregrated information and direct rows, though
// SELECT DISTINCT movies.*, COUNT(reviews.*) OVER (PARTITION BY reviews.movie_id) AS positive_review_count FROM movies JOIN reviews ON movies.id = reviews.movie_id WHERE reviews.rating > 3;
// ^ Works! But may be slower? As it has to actually compare the resultset data, as opposed to eliminating rows by their `id`
query = select("movies", [
compute({ // better name for withDerived?
positive_review_count: count(has("reviews.movie_id", [
{ rating: moreThan(3) }
]))
})
]);
// subqueries and VALUES (virtual tables)
// FIXME: subqueries can result in scalar values, somehow, in some cases? Need to look into the details of this
query = select("films", [
defineAs("archived_rental_rates", select("films", [
compute({ average: averageOf("rental_rate") }),
where({ archived: true })
])),
// Alternatively?
defineAs("rental_rates.average", averageOf("rental_rate")),
where({ rental_rate: moreThan(foreignColumn("archived_rental_rates.average")) })
]);
let virtualCustomersTable = virtualTable([{
first_name: "anne",
last_name: "smith",
age: 42
}, {
first_name: "bob",
last_name: "jones",
age: 28
}, {
first_name: "joe",
last_name: "blow",
age: 49
}]);
query = select(virtualCustomersTable, [
where({ first_name: "anne" })
]);
query = select("customers", [
defineAs("target_customers", virtualCustomersTable),
// withQueryAs("average_age", select("target_customers", [
// compute({ average: averageOf("age") })
// ])),
// NOTE: The below would need to be translated into an aggregrate subquery that is then referenced from the parent query; this case is identified by an aggregrate function being called with a *foreign* column name. For this, it may need to additionally insert data into the AST that signals to an optimizer that a subquery needs to be produced somewhere further up the AST
// FIXME: Ambiguity between "foreign column in a JOIN" and "foreign column referring to a virtual subquery table"; especially in the case where combine and defineAs are both used, and the scoping in the raqb might differ from that in the SQL query? Though it probably isn't allowed to redefine names like that anyway?
where({ age: moreThan(averageOf("target_customers.age")) })
]);
query = select("fdt", [
where({
c1: anyOf(listFrom(select("t2", [
onlyColums([ "c3" ]),
where({ c2: add([
foreignColumn("fdt.c1"),
10
]) })
])))
})
]);
query = select("table1", [
where({
column1: anyOf(listFrom("table2", "column3"), [
where({ column2: add([ 10, foreignColumn("table1.column1") ]) })
])
})
]);
// https://docs.actian.com/actianx/11.1/index.html#page/SQLRef/Scalar_Subqueries.htm
// NOTE: `compute` assumes the value side to implicitly be a possiblyForeignColumnName. Literal values must be explicit.
// NOTE: There is a second form of `compute` that only produces a single, scalar value.
let highestSalary = valueFrom("employees", compute(highestOf("salary")));
let averageCommission = valueFrom("commissions", compute(averageOf("bonus")));
query = select("employees", [
define("department", belongsTo("department_id")),
where({ department: { name: "finance" } }),
compute({
employee_name: "emp_name",
department_name: "department.name",
average_commission: averageCommission,
highest_salary: highestSalary
})
]);
// More complex version of the above, comparing only salaries in Finance
let financeEmployees = select("employees", [
define("department", belongsTo("department_id")),
where({ department: { name: "finance" } })
]);
query = select("employees", [
define("department", belongsTo("department_id")),
define("commissions", select("commissions")),
define("finance_employees", financeEmployees),
where({ department: { name: "finance" } }),
compute({
employee_name: "emp_name",
department_name: "department.name",
average_commission: averageOf("commissions.bonus"),
average_salary: averageOf("finance_employees.salary"),
highest_salary: highestOf("finance_employees.salary")
})
]);
query = select("sales", [
collapseBy([ "product_id", hierarchical([ "country_id", "city_id", "store_id" ]) ], [
compute({
total_sold: count(),
total_revenue: sum("revenue")
})
])
]);
// Or, when the data is in normalized form:
query = select("sales", [
define({
country: belongsTo("city.country_id"),
city: belongsTo("store.city_id"),
store: belongsTo("store_id")
}),
collapseBy([ "product_id", hierarchical([ "country.id", "city.id", "store_id" ]) ], [
compute({
total_sold: count(),
total_revenue: sum("revenue")
})
])
]);
// Or, inverted:
query = select("countries", [
define({
city: has("cities.country_id"),
store: has("stores.city_id"),
sales: has("sales.store_id")
}),
collapseBy([ "sales.product_id", hierarchical([ "id", "city.id", "store_id" ]) ], [
compute({
total_sold: count("sales"),
total_revenue: sum("sales.revenue")
})
])
]);
// Multiple collapseBy clauses:
// SELECT color, size, (CASE WHEN size IS NOT NULL THEN SUM(price) ELSE COUNT(*) END) AS total_sales FROM sales GROUP BY GROUPING SETS ( color, (color, size) );
// This introduces a lot of query analysis complexity, let's not implement it unless someone actually needs it...
query = select("sales", [
collapseBy("color", [
compute({ total_sales: count() })
]),
collapseBy(permutationsOf([ "color", "size" ]), [
compute({ total_sales: sum("price") })
])
]);
// FIXME: count
// FIXME: Special case, handling clauses that cannot exist within an ON, eg. ORDER BY
// FIXME: Test duplicate fields, in different WHEREs, eg. number_three
// NOTE: Need to use table aliases for self-combines/self-relations
// FIXME: cartesianProduct(...) wrapper for combine. in addition to anyOf/allOf
// FIXME: startsWith -> LIKE FOO%, endsWith -> LIKE %FOO, includes -> LIKE %FOO% -- figure out input escaping here!
// FIXME: does LIKE work with ANY/ALL?
// FIXME: Performance-class optimizer that moves non-aggregrate conditions out of the collapseBy conditions and into the parent select
// FIXME: Document that collapseBy will override the selected columns, and disallow the usage of addColumns/onlyColumns (and why that is)
// FIXME: Actually implement that limitation, too!
// FIXME: Is there an 'array' aggregrate function for when someone wants all of the unique items within a group, or something like that?
// FIXME: optionalArrayOf combinator, and permit omitting the array in all cases where an array is expected and an array of 1 item would be valid
// In practice this means permitting omitting the array (and implicitly wrapping it) anywhere, since it will then hit the regular item count check anyway.
// FIXME: Disallow the use of both compute and collapseBy wthin a `select`; require the compute to be inside the collapseBy, in that case
// FIXME: Table functions in place of a table name
// FIXME: "create function" and "create view" interfaces
// NOTE: Ensure that no matter where column references are provided, local column references *always* refer to the currently-containing context
// Need to go through all the possible combinations of constructs, and verify that this holds true, also for eg. sub-queries
// NOTE: Sometimes subqueries should be defined using WITH rather than inline or in a FROM, eg. if they do not appear in the output through a JOIN
// FIXME: IN ... clauses can accept subqueries, is this also true for ANY/ALL?
// NOTE: Subqueries in WHERE clauses can reference the outer query (lexical scoping), ie. the item being evaluated! Make sure that this is represented well in the API design.
// FIXME: Have an 'optimizer' (new 'correctness' category?) that verifies that all foreign column references are semantically valid inside of a given query? Are there cases where some sort of predefined table can be accessed without defining it in the query? If yes, it should probably be required to define this upfront in raqb queries.
// FIXME: Figure out how ordering relates to JOINs
// FIXME: JSON queries, via jsonQuery({ ... }) wrapper or so
// FIXME: Implement nested WHERE for foreign constraints
// FIXME: Bulk inserts! Should make sure that VALUES (which is probably needed for this) does not require a parameter for every single individual value
// FIXME: Figure out a solution for update/delete with LIMIT
// FIXME: initializeWith for column additions in schema (create with nullable -> update -> set non-nullable)
// This is different from defaultTo(...), which continues to be applicable *after* creating the column
// FIXME: Non-parameterizable placeholder representation, for dealing with things like currently-unknown primary keys, that will only be known upon query execution (as the executor knows the table structure)
// FIXME: Replace aliases with `compute`
query = createTable("films", {
fields: {
id: primaryKey(),
title: text(),
description: [ text(), canBeNull() ],
},
indexes: [
compositeIndex([ "data_source", "data_source_id" ])
]
});
combine(/* [ clause ], allOf([ clause ]), anyOf([ clause]) */);
// let niceNumbers = anyOf([ 1, 2, 3 ]);
// query = select("projects", [
// where({
// number_one: niceNumbers,
// number_two: niceNumbers
// }),
// where({
// number_three: anyOf([ 42, column("number_one") ]),
// number_four: 1337
// })
// ]);
// query = select("projects", [
// where({
// // foo: anyOf([ "bar", not(not("baz")), anyOf([ "bar2", "baz2" ]), unsafeSQL("TRUE") ]),
// // qux: anyOf([ 13, moreThan(42) ]),
// complex: anyOf([
// 30,
// 40,
// allOf([
// moreThan(100),
// lessThan(200),
// lessThan(parameter("max"))
// ])
// ])
// }),
// // where({ second: 2 }),
// // where(not({
// // thirdA: 3,
// // thirdB: 3
// // })),
// // where(anyOf([ { foo: "bar" } ]))
// ]);
// query = select("projects", [
// onlyColumns([
// "foo",
// alias("bar", 42),
// alias("baz", sql("foo"))
// ]),
// where(anyOf([
// { foo: "bar", qux: anyOf([ "quz", "quy" ]) },
// { baz: lessThan(42) }
// ]))
// ]);
/* {
query: 'SELECT foo, ? AS bar, foo AS baz FROM projects WHERE foo = ? OR baz < ?;',
params: [ 42, 'bar', 42 ],
placeholders: []
} */
// query = select("projects", [
// onlyColumns([ "id", "name" ]),
// where({
// active: true,
// visible: true,
// // primary_category_id: anyOf([ 2, 3, 5, 7, 8 ])
// // primary_category_id: anyOf(parameter("categoryIDs"))
// primary_category_id: not(anyOf(parameter("categoryIDs"))) // FIXME/MARKER: This gets stringified wrong!
// }),
// // FIXME: where pivot table entry exists for category in that list
// withRelations({
// primaryCategory: belongsTo("primary_category_id", { query: [ withOwner() ] }),
// categories: through([
// has("projects_categories.project_id", { query: [
// // Optional extra clauses for the query on the pivot table, eg. for filtering entries
// where({ adminApproved: true })
// ]}),
// "category_id"
// ]),
// // all user groups for a given project ID -> all memberships for the given user group IDs -> for each membership, the record referenced by the given user_id
// users: through([ "user_groups.project_id", "membership.user_group_id", "user_id" ]),
// // ... expands to ...
// // users: through([
// // has({ column: foreignColumn({ table: "user_groups", column: "project_id" }) }),
// // has({ column: foreignColumn({ table: "memberships", column: "user_group_id" }) }),
// // belongsTo({ column: column("user_id") }),
// // ]),
// owner: "owner_id",
// // ... expands to
// // owner: belongsTo({ column: "owner_id" }),
// releases: "releases.project_id",
// // ... expands to ...
// // releases: has({ column: "releases.project_id" })
// }),
// withDerived({
// capitalized_name: sql("UPPER(name)"),
// team_count: sql("moderator_count + admin_count"),
// // fourty_two: value(42), // This makes no sense in withDerived!
// name_distance: (project) => wordDistanceAlgorithm(project.name, "someReferenceName") // NOTE: This could have returned a Promise!
// }),
// mapCase({ from: "snake", to: "camel" })
// ]);
query = select("bandwidth_measurements", [
collapseBy("customer_id", [
compute({ usage: nthPercentile(95, "usage") })
])
]);
SELECT expirations.ban_id, max(expire_id) FROM expirations
WHERE expire < ? AND removed_at IS NULL
GROUP BY expirations.ban_id
query = select("expirations", [
where({
expire: lessThan(currentTime()),
removedAt: isNull()
}),
collapseBy("ban_id", [
compute({ latest_expiry_id: max("expire_id") })
])
]);
let waypointsForTrip = select("waypoints", [
onlyColumns([ "position" ]),
compute({
distance: postgis.distanceBetween(
column("position"),
fromPreviousRow("position", sortedBy("generated_at"))
)
}),
where({
user_id: foreignColumn("trips.user_id"),
generated_at: between(column("trips.started_at"), column("trips.ended_at"))
}),
sortedBy("generated_at")
]);
select("trips", [
define({
user: belongsTo("user_id"),
waypoints: waypointsForTrip,
computed_trip: select("waypoints", [
compute({
distance: sum("distance"),
route: postgis.makeLine("waypoints")
})
]),
}),
where({ user: { team_id: 42 } }),
addColumns([ "user.*" ]),
compute({
average_distance_per_ms: divide("computed_trip.distance", subtract("ended_at", "started_at")),
route_as_geojson: postgis.asGeoJSON("computed_trip.route"),
route_as_polyline: postgis.asEncodedPolyline("computed_trip.route"),
distance: "computed_trip.distance",
route: "computed_trip.route"
}),
sortedBy(descending("ended_at"))
])