Table rename, Knex update, WIP sync, misc WIP

backend-refactor
Sven Slootweg 2 years ago
parent ef8e6bf5b6
commit 90ef557a30

@ -1,6 +1,6 @@
{
"database": {
"socketPath": "/run/postgresql",
"database": "scrapingserver"
"database": "seekseek_documentation"
}
}

@ -10,5 +10,6 @@ module.exports = {
},
migrations: {
tableName: "srap_knex_migrations"
}
},
pool: { min: 0, max: 2 }
};

@ -14,17 +14,17 @@ module.exports.up = function(knex, Promise) {
})
.createTable("srap_aliases", (table) => {
table.text("alias").notNullable().primary();
table.text("item_id").references("items.id").notNullable().onUpdate("CASCADE").onDelete("CASCADE");
table.text("item_id").references("srap_items.id").notNullable().onUpdate("CASCADE").onDelete("CASCADE");
})
.createTable("srap_tags", (table) => {
table.bigIncrements("id").primary();
table.text("item_id").references("items.id").notNullable().onUpdate("CASCADE").onDelete("CASCADE");
table.text("item_id").references("srap_items.id").notNullable().onUpdate("CASCADE").onDelete("CASCADE");
table.text("name").notNullable().index();
})
.createTable("srap_task_results", (table) => {
table.primary([ "task", "item_id" ]);
table.text("task").notNullable();
table.text("item_id").references("items.id").notNullable().onUpdate("CASCADE").onDelete("CASCADE");
table.text("item_id").references("srap_items.id").notNullable().onUpdate("CASCADE").onDelete("CASCADE");
table.text("task_version").notNullable();
table.jsonb("metadata").notNullable();
table.boolean("is_successful").notNullable();
@ -35,7 +35,7 @@ module.exports.up = function(knex, Promise) {
.createTable("srap_tasks_in_progress", (table) => {
table.primary([ "task", "item_id" ]);
table.text("task").notNullable();
table.text("item_id").references("items.id").notNullable().onUpdate("CASCADE").onDelete("CASCADE");
table.text("item_id").references("srap_items.id").notNullable().onUpdate("CASCADE").onDelete("CASCADE");
table.timestamp("started_at").notNullable().defaultTo(knex.fn.now());
})
.createTable("srap_failures", (table) => {
@ -46,7 +46,7 @@ module.exports.up = function(knex, Promise) {
table.foreign([ "task", "item_id" ])
.references([ "task", "item_id" ])
.inTable("task_results");
.inTable("srap_task_results");
});
}

@ -36,12 +36,12 @@
"bluebird": "^3.7.2",
"chalk": "^4.1.0",
"create-error": "^0.3.1",
"date-fns": "^2.18.0",
"date-fns": "^2.25.0",
"debug": "^4.3.1",
"default-value": "^1.0.0",
"express": "^4.17.1",
"express-promise-router": "^4.0.1",
"knex": "^0.95.11",
"knex": "^0.21.17",
"map-obj": "^4.2.0",
"ms": "^2.1.3",
"objection": "^2.2.14",

@ -88,10 +88,12 @@ return Promise.try(() => {
});
router.get("/updates", (req, res) => {
let timestamp = (req.query.timestamp != null)
? new Date(parseInt(req.query.since) * 1000)
let timestamp = (req.query.since != null)
? new Date(parseInt(req.query.since))
: undefined;
console.log({ prefix: req.query.prefix, timestamp: timestamp });
// FIXME: Validation!
return pipe([
queries.getUpdates(knex, { prefix: req.query.prefix, timestamp: timestamp }),

@ -149,7 +149,7 @@ module.exports = function createKernel(configuration) {
return queue.execute();
});
});
});
}, { doNotRejectOnRollback: false });
}
function simulateTask(id, task) {

@ -1,7 +1,8 @@
"use strict";
const Promise = require("bluebird");
const { UniqueViolationError } = require("objection");
// const { UniqueViolationError } = require("objection");
const dateFns = require("date-fns");
const { validateArguments } = require("@validatem/core");
const required = require("@validatem/required");
@ -45,7 +46,13 @@ function taskResultsToObject(taskResults) {
module.exports = function ({ db, knex }) {
return {
// FIXME: Make object API instead
getItem: function (tx, id, optional = false) {
getItem: function (_tx, _id, _optional) {
let [ tx, id, optional ] = validateArguments(arguments, {
tx: [ required, isTX ],
id: [ required, isString ],
optional: [ defaultTo(false), isBoolean ]
});
return Promise.try(() => {
return db.Alias.relatedQuery("item", tx)
.for(id)
@ -120,7 +127,7 @@ module.exports = function ({ db, knex }) {
insertMissing: true
});
}
}).catch({ name: "UniqueViolationError", table: "items" }, (error) => {
}).catch({ name: "UniqueViolationError", table: "srap_items" }, (error) => {
if (failIfExists) {
throw error;
} else {
@ -281,16 +288,20 @@ module.exports = function ({ db, knex }) {
}]
});
let promise = db.Alias.query(tx).insert({
alias: from,
itemId: to,
updatedAt: new Date()
// Isolate this operation into a savepoint so that it can fail without breaking the entire transaction
let promise = tx.transaction((tx) => {
return db.Alias.query(tx).insert({
alias: from,
itemId: to,
updatedAt: new Date()
});
});
if (failIfExists) {
return promise;
} else {
return promise.catch(UniqueViolationError, noop);
return Promise.resolve(promise)
.catch({ name: "UniqueViolationError" }, noop);
}
},
deleteAlias: function (_tx, _options) {
@ -411,9 +422,16 @@ module.exports = function ({ db, knex }) {
}]
});
// NOTE: This is a hacky workaround - if we don't do this, then for some reason also entries *at* the exact timestamp are included, which is not what we want.
// FIXME: Verify that this doesn't break anything, eg. when an entry is created inbetween the original timestamp and +1ms.
let actualTimestamp = (timestamp != null)
? dateFns.addMilliseconds(timestamp, 1)
: undefined;
function applyWhereClauses(query, idField) {
if (timestamp != null) {
query = query.whereRaw(`updated_at > ?`, [ timestamp ]);
// FIXME: An error in the query here throws an error, resulting in an abort handling bug in a promistream
query = query.whereRaw(`updated_at > ?`, [ actualTimestamp ]);
}
if (prefix != null) {

@ -1,7 +1,7 @@
"use strict";
const Promise = require("bluebird");
const { UniqueViolationError } = require("objection");
// const { UniqueViolationError } = require("objection");
const pipe = require("@promistream/pipe");
const map = require("@promistream/map");
@ -17,7 +17,7 @@ module.exports = function ({ db, knex }) {
});
}).then(() => {
return item;
}).catch(UniqueViolationError, () => {
}).catch({ name: "UniqueViolationError" }, () => {
return mapFilter.NoValue;
});
});
@ -26,7 +26,7 @@ module.exports = function ({ db, knex }) {
return Promise.try(() => {
return knex.transaction((tx) => {
return processHandler(item, tx);
});
}, { doNotRejectOnRollback: false });
}).finally(() => {
return db.TaskInProgress.query(knex)
.delete()

@ -13,7 +13,7 @@ const rateLimit = require("@promistream/rate-limit");
const parallelize = require("@promistream/parallelize");
const logStatus = require("./log-status");
const { UniqueViolationError } = require("objection");
// const { UniqueViolationError } = require("objection");
// FIXME: Revert inlining of task_states once switched to PostgreSQL 12+, which can do this automatically using NOT MATERIALIZED
// FIXME: Check whether the dependency task_versions are actually being correctly passed in, and aren't accidentally nulls
@ -27,25 +27,25 @@ let query = `
SELECT
DISTINCT ON (srap_items.id)
srap_items.*,
srap_results.updated_at AS result_date,
srap_results.task_version,
results.updated_at AS result_date,
results.task_version,
(
srap_results.is_successful = TRUE
results.is_successful = TRUE
AND (
srap_results.expires_at < NOW()
OR srap_results.is_invalidated = TRUE
results.expires_at < NOW()
OR results.is_invalidated = TRUE
)
) AS is_candidate
FROM items
FROM srap_items
INNER JOIN srap_tags
ON srap_tags.item_id = srap_items.id
AND srap_tags.name = ANY(:tags)
LEFT JOIN srap_task_results AS results
ON results.item_id = items.id
ON results.item_id = srap_items.id
AND results.task = :task
WHERE
NOT EXISTS (
SELECT FROM srap_tasks_in_progress AS pr WHERE pr.item_id = items.id
SELECT FROM srap_tasks_in_progress AS pr WHERE pr.item_id = srap_items.id
)
),
candidates AS (
@ -178,7 +178,7 @@ module.exports = function (state) {
metadata: {},
... commonUpdate
});
}).catch(UniqueViolationError, () => {
}).catch({ name: "UniqueViolationError" }, () => {
return db.TaskResult.query(knex).findById([ task, item.id ]).patch({
... commonUpdate
});

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save