Table rename, Knex update, WIP sync, misc WIP

This commit is contained in:
Sven Slootweg 2021-12-09 10:12:11 +01:00
parent ef8e6bf5b6
commit 90ef557a30
10 changed files with 902 additions and 76 deletions

View file

@ -1,6 +1,6 @@
{ {
"database": { "database": {
"socketPath": "/run/postgresql", "socketPath": "/run/postgresql",
"database": "scrapingserver" "database": "seekseek_documentation"
} }
} }

View file

@ -10,5 +10,6 @@ module.exports = {
}, },
migrations: { migrations: {
tableName: "srap_knex_migrations" tableName: "srap_knex_migrations"
} },
pool: { min: 0, max: 2 }
}; };

View file

@ -14,17 +14,17 @@ module.exports.up = function(knex, Promise) {
}) })
.createTable("srap_aliases", (table) => { .createTable("srap_aliases", (table) => {
table.text("alias").notNullable().primary(); table.text("alias").notNullable().primary();
table.text("item_id").references("items.id").notNullable().onUpdate("CASCADE").onDelete("CASCADE"); table.text("item_id").references("srap_items.id").notNullable().onUpdate("CASCADE").onDelete("CASCADE");
}) })
.createTable("srap_tags", (table) => { .createTable("srap_tags", (table) => {
table.bigIncrements("id").primary(); table.bigIncrements("id").primary();
table.text("item_id").references("items.id").notNullable().onUpdate("CASCADE").onDelete("CASCADE"); table.text("item_id").references("srap_items.id").notNullable().onUpdate("CASCADE").onDelete("CASCADE");
table.text("name").notNullable().index(); table.text("name").notNullable().index();
}) })
.createTable("srap_task_results", (table) => { .createTable("srap_task_results", (table) => {
table.primary([ "task", "item_id" ]); table.primary([ "task", "item_id" ]);
table.text("task").notNullable(); table.text("task").notNullable();
table.text("item_id").references("items.id").notNullable().onUpdate("CASCADE").onDelete("CASCADE"); table.text("item_id").references("srap_items.id").notNullable().onUpdate("CASCADE").onDelete("CASCADE");
table.text("task_version").notNullable(); table.text("task_version").notNullable();
table.jsonb("metadata").notNullable(); table.jsonb("metadata").notNullable();
table.boolean("is_successful").notNullable(); table.boolean("is_successful").notNullable();
@ -35,7 +35,7 @@ module.exports.up = function(knex, Promise) {
.createTable("srap_tasks_in_progress", (table) => { .createTable("srap_tasks_in_progress", (table) => {
table.primary([ "task", "item_id" ]); table.primary([ "task", "item_id" ]);
table.text("task").notNullable(); table.text("task").notNullable();
table.text("item_id").references("items.id").notNullable().onUpdate("CASCADE").onDelete("CASCADE"); table.text("item_id").references("srap_items.id").notNullable().onUpdate("CASCADE").onDelete("CASCADE");
table.timestamp("started_at").notNullable().defaultTo(knex.fn.now()); table.timestamp("started_at").notNullable().defaultTo(knex.fn.now());
}) })
.createTable("srap_failures", (table) => { .createTable("srap_failures", (table) => {
@ -46,7 +46,7 @@ module.exports.up = function(knex, Promise) {
table.foreign([ "task", "item_id" ]) table.foreign([ "task", "item_id" ])
.references([ "task", "item_id" ]) .references([ "task", "item_id" ])
.inTable("task_results"); .inTable("srap_task_results");
}); });
} }

View file

@ -36,12 +36,12 @@
"bluebird": "^3.7.2", "bluebird": "^3.7.2",
"chalk": "^4.1.0", "chalk": "^4.1.0",
"create-error": "^0.3.1", "create-error": "^0.3.1",
"date-fns": "^2.18.0", "date-fns": "^2.25.0",
"debug": "^4.3.1", "debug": "^4.3.1",
"default-value": "^1.0.0", "default-value": "^1.0.0",
"express": "^4.17.1", "express": "^4.17.1",
"express-promise-router": "^4.0.1", "express-promise-router": "^4.0.1",
"knex": "^0.95.11", "knex": "^0.21.17",
"map-obj": "^4.2.0", "map-obj": "^4.2.0",
"ms": "^2.1.3", "ms": "^2.1.3",
"objection": "^2.2.14", "objection": "^2.2.14",

View file

@ -88,10 +88,12 @@ return Promise.try(() => {
}); });
router.get("/updates", (req, res) => { router.get("/updates", (req, res) => {
let timestamp = (req.query.timestamp != null) let timestamp = (req.query.since != null)
? new Date(parseInt(req.query.since) * 1000) ? new Date(parseInt(req.query.since))
: undefined; : undefined;
console.log({ prefix: req.query.prefix, timestamp: timestamp });
// FIXME: Validation! // FIXME: Validation!
return pipe([ return pipe([
queries.getUpdates(knex, { prefix: req.query.prefix, timestamp: timestamp }), queries.getUpdates(knex, { prefix: req.query.prefix, timestamp: timestamp }),

View file

@ -149,7 +149,7 @@ module.exports = function createKernel(configuration) {
return queue.execute(); return queue.execute();
}); });
}); });
}); }, { doNotRejectOnRollback: false });
} }
function simulateTask(id, task) { function simulateTask(id, task) {

View file

@ -1,7 +1,8 @@
"use strict"; "use strict";
const Promise = require("bluebird"); const Promise = require("bluebird");
const { UniqueViolationError } = require("objection"); // const { UniqueViolationError } = require("objection");
const dateFns = require("date-fns");
const { validateArguments } = require("@validatem/core"); const { validateArguments } = require("@validatem/core");
const required = require("@validatem/required"); const required = require("@validatem/required");
@ -45,7 +46,13 @@ function taskResultsToObject(taskResults) {
module.exports = function ({ db, knex }) { module.exports = function ({ db, knex }) {
return { return {
// FIXME: Make object API instead // FIXME: Make object API instead
getItem: function (tx, id, optional = false) { getItem: function (_tx, _id, _optional) {
let [ tx, id, optional ] = validateArguments(arguments, {
tx: [ required, isTX ],
id: [ required, isString ],
optional: [ defaultTo(false), isBoolean ]
});
return Promise.try(() => { return Promise.try(() => {
return db.Alias.relatedQuery("item", tx) return db.Alias.relatedQuery("item", tx)
.for(id) .for(id)
@ -120,7 +127,7 @@ module.exports = function ({ db, knex }) {
insertMissing: true insertMissing: true
}); });
} }
}).catch({ name: "UniqueViolationError", table: "items" }, (error) => { }).catch({ name: "UniqueViolationError", table: "srap_items" }, (error) => {
if (failIfExists) { if (failIfExists) {
throw error; throw error;
} else { } else {
@ -281,16 +288,20 @@ module.exports = function ({ db, knex }) {
}] }]
}); });
let promise = db.Alias.query(tx).insert({ // Isolate this operation into a savepoint so that it can fail without breaking the entire transaction
alias: from, let promise = tx.transaction((tx) => {
itemId: to, return db.Alias.query(tx).insert({
updatedAt: new Date() alias: from,
itemId: to,
updatedAt: new Date()
});
}); });
if (failIfExists) { if (failIfExists) {
return promise; return promise;
} else { } else {
return promise.catch(UniqueViolationError, noop); return Promise.resolve(promise)
.catch({ name: "UniqueViolationError" }, noop);
} }
}, },
deleteAlias: function (_tx, _options) { deleteAlias: function (_tx, _options) {
@ -411,9 +422,16 @@ module.exports = function ({ db, knex }) {
}] }]
}); });
// NOTE: This is a hacky workaround - if we don't do this, then for some reason also entries *at* the exact timestamp are included, which is not what we want.
// FIXME: Verify that this doesn't break anything, eg. when an entry is created inbetween the original timestamp and +1ms.
let actualTimestamp = (timestamp != null)
? dateFns.addMilliseconds(timestamp, 1)
: undefined;
function applyWhereClauses(query, idField) { function applyWhereClauses(query, idField) {
if (timestamp != null) { if (timestamp != null) {
query = query.whereRaw(`updated_at > ?`, [ timestamp ]); // FIXME: An error in the query here throws an error, resulting in an abort handling bug in a promistream
query = query.whereRaw(`updated_at > ?`, [ actualTimestamp ]);
} }
if (prefix != null) { if (prefix != null) {

View file

@ -1,7 +1,7 @@
"use strict"; "use strict";
const Promise = require("bluebird"); const Promise = require("bluebird");
const { UniqueViolationError } = require("objection"); // const { UniqueViolationError } = require("objection");
const pipe = require("@promistream/pipe"); const pipe = require("@promistream/pipe");
const map = require("@promistream/map"); const map = require("@promistream/map");
@ -17,7 +17,7 @@ module.exports = function ({ db, knex }) {
}); });
}).then(() => { }).then(() => {
return item; return item;
}).catch(UniqueViolationError, () => { }).catch({ name: "UniqueViolationError" }, () => {
return mapFilter.NoValue; return mapFilter.NoValue;
}); });
}); });
@ -26,7 +26,7 @@ module.exports = function ({ db, knex }) {
return Promise.try(() => { return Promise.try(() => {
return knex.transaction((tx) => { return knex.transaction((tx) => {
return processHandler(item, tx); return processHandler(item, tx);
}); }, { doNotRejectOnRollback: false });
}).finally(() => { }).finally(() => {
return db.TaskInProgress.query(knex) return db.TaskInProgress.query(knex)
.delete() .delete()

View file

@ -13,7 +13,7 @@ const rateLimit = require("@promistream/rate-limit");
const parallelize = require("@promistream/parallelize"); const parallelize = require("@promistream/parallelize");
const logStatus = require("./log-status"); const logStatus = require("./log-status");
const { UniqueViolationError } = require("objection"); // const { UniqueViolationError } = require("objection");
// FIXME: Revert inlining of task_states once switched to PostgreSQL 12+, which can do this automatically using NOT MATERIALIZED // FIXME: Revert inlining of task_states once switched to PostgreSQL 12+, which can do this automatically using NOT MATERIALIZED
// FIXME: Check whether the dependency task_versions are actually being correctly passed in, and aren't accidentally nulls // FIXME: Check whether the dependency task_versions are actually being correctly passed in, and aren't accidentally nulls
@ -27,25 +27,25 @@ let query = `
SELECT SELECT
DISTINCT ON (srap_items.id) DISTINCT ON (srap_items.id)
srap_items.*, srap_items.*,
srap_results.updated_at AS result_date, results.updated_at AS result_date,
srap_results.task_version, results.task_version,
( (
srap_results.is_successful = TRUE results.is_successful = TRUE
AND ( AND (
srap_results.expires_at < NOW() results.expires_at < NOW()
OR srap_results.is_invalidated = TRUE OR results.is_invalidated = TRUE
) )
) AS is_candidate ) AS is_candidate
FROM items FROM srap_items
INNER JOIN srap_tags INNER JOIN srap_tags
ON srap_tags.item_id = srap_items.id ON srap_tags.item_id = srap_items.id
AND srap_tags.name = ANY(:tags) AND srap_tags.name = ANY(:tags)
LEFT JOIN srap_task_results AS results LEFT JOIN srap_task_results AS results
ON results.item_id = items.id ON results.item_id = srap_items.id
AND results.task = :task AND results.task = :task
WHERE WHERE
NOT EXISTS ( NOT EXISTS (
SELECT FROM srap_tasks_in_progress AS pr WHERE pr.item_id = items.id SELECT FROM srap_tasks_in_progress AS pr WHERE pr.item_id = srap_items.id
) )
), ),
candidates AS ( candidates AS (
@ -178,7 +178,7 @@ module.exports = function (state) {
metadata: {}, metadata: {},
... commonUpdate ... commonUpdate
}); });
}).catch(UniqueViolationError, () => { }).catch({ name: "UniqueViolationError" }, () => {
return db.TaskResult.query(knex).findById([ task, item.id ]).patch({ return db.TaskResult.query(knex).findById([ task, item.id ]).patch({
... commonUpdate ... commonUpdate
}); });

889
yarn.lock

File diff suppressed because it is too large Load diff