Rewrite queue refill query to be much faster, fix duplicate tags

This commit is contained in:
Sven Slootweg 2022-11-26 02:08:24 +01:00
parent dbd15aa1d7
commit 59b89a3459
7 changed files with 4153 additions and 103 deletions

1
.gitignore vendored
View file

@ -1,2 +1,3 @@
node_modules node_modules
junk junk
.clinic

View file

@ -0,0 +1,39 @@
"use strict";
module.exports.up = function(knex, Promise) {
// Get rid of existing duplicate entries
return knex.raw(`
DELETE FROM srap_tags
WHERE id IN (
SELECT
id
FROM (
SELECT
id,
row_number() OVER w as rnum
FROM srap_tags
WINDOW w AS (
PARTITION BY name, item_id
ORDER BY id
)
) t
WHERE t.rnum > 1);
`).then(() => {
return knex.schema
.alterTable("srap_tags", (table) => {
table.dropPrimary();
table.dropIndex("name");
table.dropColumn("id");
table.primary([ "name", "item_id" ]);
});
});
};
module.exports.down = function(knex, Promise) {
return knex.schema
.alterTable("srap_tags", (table) => {
table.dropPrimary();
table.bigIncrements("id").primary();
});
};

View file

@ -61,7 +61,8 @@
}, },
"devDependencies": { "devDependencies": {
"@joepie91/eslint-config": "^1.1.0", "@joepie91/eslint-config": "^1.1.0",
"eslint": "^7.21.0" "eslint": "^7.21.0",
"clinic": "^12.0.0"
}, },
"bin": { "bin": {
"srap-server": "./bin/server", "srap-server": "./bin/server",

View file

@ -167,7 +167,7 @@ module.exports = function(state) {
id: actualID, id: actualID,
data: update(existingData), data: update(existingData),
createdBy: parentID, createdBy: parentID,
tags: tags.map((tag) => ({ name: tag })), tags: tags.map((tag) => ({ name: tag, itemId: actualID })),
aliases: allAliases.map((alias) => ({ alias: alias })), aliases: allAliases.map((alias) => ({ alias: alias })),
updatedAt: new Date() updatedAt: new Date()
}; };

View file

@ -5,6 +5,7 @@ const { Model, QueryBuilder } = require("objection");
module.exports = function ({ db }) { module.exports = function ({ db }) {
return class Tag extends Model { return class Tag extends Model {
static tableName = "srap_tags"; static tableName = "srap_tags";
static idColumn = [ "name", "itemId" ];
static get relationMappings() { static get relationMappings() {
return { return {

View file

@ -19,79 +19,82 @@ const fetchQuery = `
LIMIT :resultLimit LIMIT :resultLimit
`; `;
const fillQuery = ` function makeFillQuery(withDependencies) {
return `
WITH WITH
dependency_tasks AS ( ${withDependencies ? `
SELECT * FROM dependencies AS (
json_to_recordset(:dependencyTaskDefinitions) AS x(task text, task_version text) SELECT * FROM json_to_recordset(:dependencyTaskDefinitions) AS x(task text, task_version text)
), ),
matching_items AS ( satisfied AS (
SELECT SELECT results.* FROM dependencies
srap_items.*,
results.updated_at AS result_date,
results.task_version,
(
results.is_successful = TRUE
AND (
results.expires_at < NOW()
OR results.is_invalidated = TRUE
)
) AS is_candidate
FROM srap_items
INNER JOIN srap_tags
ON srap_tags.item_id = srap_items.id
AND srap_tags.name = ANY(:tags)
LEFT JOIN srap_task_results AS results LEFT JOIN srap_task_results AS results
ON results.item_id = srap_items.id ON dependencies.task = results.task
AND results.task = :task AND dependencies.task_version = results.task_version
WHERE
results.is_successful = TRUE
AND results.is_invalidated = FALSE
AND results.expires_at > NOW()
), ),
candidates AS ( counts AS (
SELECT * FROM matching_items SELECT item_id, COUNT(task) AS count FROM satisfied GROUP BY item_id
WHERE result_date IS NULL ),
UNION ALL dependency_candidates AS (
SELECT * FROM matching_items SELECT item_id FROM counts WHERE count = :dependencyCount
WHERE is_candidate = TRUE ),
OR NOT (task_version = :taskVersion) ` : "" }
tag_candidates AS (
SELECT item_id FROM srap_tags WHERE name = ANY(:tags)
),
full_candidates AS MATERIALIZED (
${withDependencies
? `
SELECT tag_candidates.item_id FROM dependency_candidates
INNER JOIN tag_candidates
ON dependency_candidates.item_id = tag_candidates.item_id
`
: `
SELECT item_id FROM tag_candidates
`
}
) )
(
SELECT SELECT
:task AS task, :task AS task,
id AS item_id item_id
FROM FROM full_candidates
candidates WHERE NOT EXISTS (
SELECT item_id FROM srap_task_results AS results
WHERE WHERE
NOT EXISTS ( item_id = full_candidates.item_id
SELECT AND results.task = :task
results.* AND results.task_version = :taskVersion
FROM dependency_tasks AND results.is_successful = TRUE
LEFT JOIN srap_task_results AS results AND results.is_invalidated = FALSE
ON dependency_tasks.task = results.task AND results.expires_at > NOW()
AND dependency_tasks.task_version = results.task_version
AND results.item_id = candidates.id
WHERE
results.is_successful IS NULL
OR results.is_successful = FALSE
OR (
results.is_successful = TRUE
AND (
results.expires_at < NOW()
OR results.is_invalidated = TRUE
) )
) `;
) }
)
`; const fillQueryWithDependencies = makeFillQuery(true);
const fillQueryWithoutDependencies = makeFillQuery(false);
module.exports = function ({ metrics, backendSettings, knex }) { module.exports = function ({ metrics, backendSettings, knex }) {
return function (tx, { task }) { return function (tx, { task }) {
let hasDependencies = (task.dependencies.length > 0);
let refillParameters = { let refillParameters = {
tags: task.tags, tags: task.tags,
task: task.name, task: task.name,
taskVersion: task.version, taskVersion: task.version,
... hasDependencies
? {
dependencyCount: task.dependencies.length,
dependencyTaskDefinitions: JSON.stringify(task.dependencies.map((dependency) => { dependencyTaskDefinitions: JSON.stringify(task.dependencies.map((dependency) => {
// Case-mapping for SQL compatibility // Case-mapping for SQL compatibility
return { task_version: dependency.version, task: dependency.name }; return { task_version: dependency.version, task: dependency.name };
})) }))
}
: {}
}; };
let fetchParameters = { let fetchParameters = {
@ -104,6 +107,10 @@ module.exports = function ({ metrics, backendSettings, knex }) {
let startTime = Date.now(); let startTime = Date.now();
return Promise.try(() => { return Promise.try(() => {
let fillQuery = (hasDependencies)
? fillQueryWithDependencies
: fillQueryWithoutDependencies;
// NOTE: We are deliberately bypassing the transaction here! Also deliberately not using VALUES, since we're inserting from the results of another query instead // NOTE: We are deliberately bypassing the transaction here! Also deliberately not using VALUES, since we're inserting from the results of another query instead
return knex.raw(` return knex.raw(`
INSERT INTO srap_queue (task, item_id) INSERT INTO srap_queue (task, item_id)

4075
yarn.lock

File diff suppressed because it is too large Load diff