Rewrite queue refill query to be much faster, fix duplicate tags

This commit is contained in:
Sven Slootweg 2022-11-26 02:08:24 +01:00
parent dbd15aa1d7
commit 59b89a3459
7 changed files with 4153 additions and 103 deletions

1
.gitignore vendored
View file

@ -1,2 +1,3 @@
node_modules
junk
.clinic

View file

@ -0,0 +1,39 @@
"use strict";
module.exports.up = function(knex, Promise) {
// Get rid of existing duplicate entries
return knex.raw(`
DELETE FROM srap_tags
WHERE id IN (
SELECT
id
FROM (
SELECT
id,
row_number() OVER w as rnum
FROM srap_tags
WINDOW w AS (
PARTITION BY name, item_id
ORDER BY id
)
) t
WHERE t.rnum > 1);
`).then(() => {
return knex.schema
.alterTable("srap_tags", (table) => {
table.dropPrimary();
table.dropIndex("name");
table.dropColumn("id");
table.primary([ "name", "item_id" ]);
});
});
};
module.exports.down = function(knex, Promise) {
return knex.schema
.alterTable("srap_tags", (table) => {
table.dropPrimary();
table.bigIncrements("id").primary();
});
};

View file

@ -61,7 +61,8 @@
},
"devDependencies": {
"@joepie91/eslint-config": "^1.1.0",
"eslint": "^7.21.0"
"eslint": "^7.21.0",
"clinic": "^12.0.0"
},
"bin": {
"srap-server": "./bin/server",

View file

@ -167,7 +167,7 @@ module.exports = function(state) {
id: actualID,
data: update(existingData),
createdBy: parentID,
tags: tags.map((tag) => ({ name: tag })),
tags: tags.map((tag) => ({ name: tag, itemId: actualID })),
aliases: allAliases.map((alias) => ({ alias: alias })),
updatedAt: new Date()
};

View file

@ -5,6 +5,7 @@ const { Model, QueryBuilder } = require("objection");
module.exports = function ({ db }) {
return class Tag extends Model {
static tableName = "srap_tags";
static idColumn = [ "name", "itemId" ];
static get relationMappings() {
return {

View file

@ -19,79 +19,82 @@ const fetchQuery = `
LIMIT :resultLimit
`;
const fillQuery = `
WITH
dependency_tasks AS (
SELECT * FROM
json_to_recordset(:dependencyTaskDefinitions) AS x(task text, task_version text)
),
matching_items AS (
SELECT
srap_items.*,
results.updated_at AS result_date,
results.task_version,
(
results.is_successful = TRUE
AND (
results.expires_at < NOW()
OR results.is_invalidated = TRUE
)
) AS is_candidate
FROM srap_items
INNER JOIN srap_tags
ON srap_tags.item_id = srap_items.id
AND srap_tags.name = ANY(:tags)
LEFT JOIN srap_task_results AS results
ON results.item_id = srap_items.id
AND results.task = :task
),
candidates AS (
SELECT * FROM matching_items
WHERE result_date IS NULL
UNION ALL
SELECT * FROM matching_items
WHERE is_candidate = TRUE
OR NOT (task_version = :taskVersion)
)
(
function makeFillQuery(withDependencies) {
return `
WITH
${withDependencies ? `
dependencies AS (
SELECT * FROM json_to_recordset(:dependencyTaskDefinitions) AS x(task text, task_version text)
),
satisfied AS (
SELECT results.* FROM dependencies
LEFT JOIN srap_task_results AS results
ON dependencies.task = results.task
AND dependencies.task_version = results.task_version
WHERE
results.is_successful = TRUE
AND results.is_invalidated = FALSE
AND results.expires_at > NOW()
),
counts AS (
SELECT item_id, COUNT(task) AS count FROM satisfied GROUP BY item_id
),
dependency_candidates AS (
SELECT item_id FROM counts WHERE count = :dependencyCount
),
` : "" }
tag_candidates AS (
SELECT item_id FROM srap_tags WHERE name = ANY(:tags)
),
full_candidates AS MATERIALIZED (
${withDependencies
? `
SELECT tag_candidates.item_id FROM dependency_candidates
INNER JOIN tag_candidates
ON dependency_candidates.item_id = tag_candidates.item_id
`
: `
SELECT item_id FROM tag_candidates
`
}
)
SELECT
:task AS task,
id AS item_id
FROM
candidates
WHERE
NOT EXISTS (
SELECT
results.*
FROM dependency_tasks
LEFT JOIN srap_task_results AS results
ON dependency_tasks.task = results.task
AND dependency_tasks.task_version = results.task_version
AND results.item_id = candidates.id
WHERE
results.is_successful IS NULL
OR results.is_successful = FALSE
OR (
results.is_successful = TRUE
AND (
results.expires_at < NOW()
OR results.is_invalidated = TRUE
)
)
)
)
`;
item_id
FROM full_candidates
WHERE NOT EXISTS (
SELECT item_id FROM srap_task_results AS results
WHERE
item_id = full_candidates.item_id
AND results.task = :task
AND results.task_version = :taskVersion
AND results.is_successful = TRUE
AND results.is_invalidated = FALSE
AND results.expires_at > NOW()
)
`;
}
const fillQueryWithDependencies = makeFillQuery(true);
const fillQueryWithoutDependencies = makeFillQuery(false);
module.exports = function ({ metrics, backendSettings, knex }) {
return function (tx, { task }) {
let hasDependencies = (task.dependencies.length > 0);
let refillParameters = {
tags: task.tags,
task: task.name,
taskVersion: task.version,
dependencyTaskDefinitions: JSON.stringify(task.dependencies.map((dependency) => {
// Case-mapping for SQL compatibility
return { task_version: dependency.version, task: dependency.name };
}))
... hasDependencies
? {
dependencyCount: task.dependencies.length,
dependencyTaskDefinitions: JSON.stringify(task.dependencies.map((dependency) => {
// Case-mapping for SQL compatibility
return { task_version: dependency.version, task: dependency.name };
}))
}
: {}
};
let fetchParameters = {
@ -104,6 +107,10 @@ module.exports = function ({ metrics, backendSettings, knex }) {
let startTime = Date.now();
return Promise.try(() => {
let fillQuery = (hasDependencies)
? fillQueryWithDependencies
: fillQueryWithoutDependencies;
// NOTE: We are deliberately bypassing the transaction here! Also deliberately not using VALUES, since we're inserting from the results of another query instead
return knex.raw(`
INSERT INTO srap_queue (task, item_id)

4075
yarn.lock

File diff suppressed because it is too large Load diff