@ -111,8 +112,14 @@ module.exports = function (state) {
let[tx,options]=validateArguments(arguments,{
tx:maybeTX,
options:[required,{
// FIXME: Stricter validation
seeds:[required,arrayOf(anything)]
// FIXME: This currently duplicates validation logic from forItem.storeItem; figure out a way to deduplicate that
seeds:[required,arrayOf({
id:[required,isString],
// Tags are required to be specified (even if an empty array) because it's easily forgotten
tags:[required,arrayOf(isString)],
aliases:[defaultTo([]),arrayOf(isString)],
data:[required,anything],// FIXME: Check for object
})]
}]
});
@ -126,23 +133,230 @@ module.exports = function (state) {
failIfExists:false
});
});
}
},
// FIXME: Other than the missing readOperation wrapper and the tx argument, this is *basically* the same logic as under forItem... this should be simplified somehow.
getItem:function(_tx,_options){
let[tx,options]=validateArguments(arguments,{
tx:maybeTX,
options:[required,wrapValueAsOption("id"),{
id:[required,isString],
optional:[defaultTo(false),isBoolean]// FIXME: Can this handling be moved to the wrapper?
}]
});
returnbackend.getItem(tx,options);
},
},
forItem:function({item,task,mutationQueue}){
forItem:function(_options){
// FIXME: Proper validation rules here for the other fields as well
// We create a new instance of the actual API for every item being processed. This is necessary because some of the input arguments will default to item-specific values, and some of the logic is dependent on task-specific metadata. This is a more efficient (and understandable) approach than pretending the API is stateless and then separately wrapping the API *again* for every individual item with a whole separate layer of input validation rules.
// FIXME: Is this still correct, with the new task (graph) format?
unreachable("No mutation queue provided in live mode");
}
}
functionreadOperation(func){
returnfunc(readTX,backend);
}
letexposedAPI={
// NOTE: 'exposed' API methods are the ones that are passed into a user-defined task, and which the task uses to eg. update or create new items
getItem:function(_options){
let[options]=validateArguments(arguments,{
options:[required,wrapValueAsOption("id"),{
id:[required,isString],
optional:[defaultTo(false),isBoolean]// FIXME: Can this handling be moved to the wrapper?
}]
});
returnreadOperation((tx)=>{
returnbackend.getItem(tx,options);
});
},
storeItem:function(_options){
// NOTE: Using `update` instead of `data` makes it an upsert!
let[options]=validateArguments(arguments,{
options:[required,{
id:[required,isString],
// Tags are required to be specified (even if an empty array) because it's easily forgotten
tags:[required,arrayOf(isString)],
aliases:[defaultTo([]),arrayOf(isString)],
data:[anything],// FIXME: Check for object
update:[isFunction],
failIfExists:[defaultTo(false),isBoolean],
allowUpsert:[defaultTo(true),isBoolean],
parentID:[defaultTo(item.id),isString]
},requireEither(["data","update"])]
});
let{data,...rest}=options;
returnmutableOperation((tx)=>{
returnbackend.storeItem(tx,{
...rest,
// We normalize `data` and `update` (which are mutually-exclusive) into a single option here, so that the backend only needs to deal with the `update` case
// TODO: Can this be folded into the validation rules in a reasonable and readable way?
update:(data!=null)
?(existingData)=>({...existingData,...data})
:rest.update
});
});
},
moveItem:function(_options){
let[options]=validateArguments(arguments,{
options:[required,wrapValueAsOption("into"),{
from:[defaultTo(item.id),isString],
into:[required,isString],
// NOTE: If no `merge` function is specified, that indicates that merging is not allowed (ie. this is strictly a rename), and mergeMetadata is ignored too
failIfExists:[defaultTo(false),isBoolean]// TODO: Shouldn't this default to true, for any occurrence outside of a merge/rename?
}]
});
returnmutableOperation((tx)=>{
returnbackend.createAlias(tx,options);
});
},
deleteAlias:function(_options){
let[options]=validateArguments(arguments,{
options:[required,wrapValueAsOption("from"),{
from:[required,isString]
}]
});
returnmutableOperation((tx)=>{
returnbackend.deleteAlias(tx,options);
});
},
updateData:function(_options){
// NOTE: This is a semantically self-describing convenience wrapper for `storeItem` that updates the currently-being-processed item
let[options]=validateArguments(arguments,{
options:[required,wrapValueAsOption("update"),{
id:[defaultTo(item.id),isString],
update:[required,isFunction]
}]
});
returnexposedAPI.storeItem({
...options,
tags:[]
});
},
updateMetadata:function(_options){
let[options]=validateArguments(arguments,{
options:[required,wrapValueAsOption("update"),{
id:[defaultTo(item.id),isString],
update:[required,isFunction],
task:[required,isTask]
}]
});
returnmutableOperation((tx)=>{
returnbackend.updateMetadata(tx,options);
});
},
expire:function(_options){
// TODO: It probably doesn't make any semantic sense to leave *both* arguments unspecified. Maybe that should be prohibited via eg. a non-exclusive requireEither? Otherwise the user might expect to immediately expire the *current* task, but since the task is only updated *after* the task logic runs, that is not currently possible to express.
let[options]=validateArguments(arguments,{
options:[required,{
id:[defaultTo(item.id),isString],
isTask:[defaultTo(task),isTask]
}]
});
returnmutableOperation((tx)=>{
returnbackend.expire(tx,options);
});
},
expireDependents:function(_options){
// NOTE: This method does not have a counterpart in the database backend; it's a convenience abstraction over regular `backend.expire` calls
// NOTE: 'internal' API methods are accessible to srap, but not to user-defined tasks.
@ -203,196 +417,7 @@ module.exports = function (state) {
});
},
},
exposed:{
// NOTE: 'exposed' API methods are the ones that are passed into a user-defined task, and which the task uses to eg. update or create new items
getItem:function(_tx,_id,_optional){
let[tx,options]=validateArguments(arguments,{
tx:maybeTX,
options:[required,wrapValueAsOption("id"),{
id:[required,isString],
optional:[defaultTo(false),isBoolean]// FIXME: Can this handling be moved to the wrapper?
}]
});
returnbackend.getItem(tx,options);
},
storeItem:function(_tx,_options){
// NOTE: Using `update` instead of `data` makes it an upsert!
let[tx,options]=validateArguments(arguments,{
tx:maybeTX,
options:[required,{
id:[required,isString],
// Tags are required to be specified (even if an empty array) because it's easily forgotten
tags:[required,arrayOf(isString)],
aliases:[defaultTo([]),arrayOf(isString)],
data:[anything],// FIXME: Check for object
update:[isFunction],
failIfExists:[defaultTo(false),isBoolean],
allowUpsert:[defaultTo(true),isBoolean],
parentID:[defaultTo(item.id),isString]
},requireEither(["data","update"])]
});
let{data,...rest}=options;
returnmutableOperation((backend)=>{
returnbackend.storeItem(tx,{
...rest,
// We normalize `data` and `update` (which are mutually-exclusive) into a single option here, so that the backend only needs to deal with the `update` case
// TODO: Can this be folded into the validation rules in a reasonable and readable way?
update:(data!=null)
?(existingData)=>({...existingData,...data})
:rest.update
});
});
},
moveItem:function(_tx,_options){
let[tx,options]=validateArguments(arguments,{
tx:maybeTX,
options:[required,wrapValueAsOption("into"),{
from:[defaultTo(item.id),isString],
into:[required,isString],
// NOTE: If no `merge` function is specified, that indicates that merging is not allowed (ie. this is strictly a rename), and mergeMetadata is ignored too
failIfExists:[defaultTo(false),isBoolean]// TODO: Shouldn't this default to true, for any occurrence outside of a merge/rename?
}]
});
returnmutableOperation((backend)=>{
returnbackend.createAlias(tx,options);
});
},
deleteAlias:function(_tx,_options){
let[tx,options]=validateArguments(arguments,{
tx:maybeTX,
options:[required,wrapValueAsOption("from"),{
from:[required,isString]
}]
});
returnmutableOperation((backend)=>{
returnbackend.deleteAlias(tx,options);
});
},
updateData:function(_tx,_options){
// NOTE: This is a semantically self-describing convenience wrapper for `createItem` that updates the currently-being-processed item
let[tx,options]=validateArguments(arguments,{
tx:maybeTX,
options:[required,wrapValueAsOption("update"),{
id:[defaultTo(item.id),isString],
update:[required,isFunction]
}]
});
returnmutableOperation((backend)=>{
returnbackend.createItem(tx,{
...options,
tags:[]
});
});
},
updateMetadata:function(_tx,_options){
let[tx,options]=validateArguments(arguments,{
tx:maybeTX,
options:[required,wrapValueAsOption("update"),{
id:[defaultTo(item.id),isString],
update:[required,isFunction],
task:[required,isTask]
}]
});
returnmutableOperation((backend)=>{
returnbackend.updateMetadata(tx,options);
});
},
expire:function(_tx,_options){
// TODO: It probably doesn't make any semantic sense to leave *both* arguments unspecified. Maybe that should be prohibited via eg. a non-exclusive requireEither? Otherwise the user might expect to immediately expire the *current* task, but since the task is only updated *after* the task logic runs, that is not currently possible to express.
let[tx,options]=validateArguments(arguments,{
tx:maybeTX,
options:[required,{
id:[defaultTo(item.id),isString],
isTask:[defaultTo(task),isTask]
}]
});
returnmutableOperation((backend)=>{
returnbackend.expire(tx,options);
});
},
expireDependents:function(_tx,_options){
// NOTE: This method does not have a counterpart in the database backend; it's a convenience abstraction over regular `backend.expire` calls
// TODO: Does it really make sense to be merging in the backendSettings here? Shouldn't that happen automatically in some way for *every* backend, rather than just the PostgreSQL one specifically? As backend settings are a generic backend feature
// FIXME: Make failIfExists actually work, currently it does nothing as the UNIQUE constraint violation cannot occur for an upsert
// TODO: Ensure that we run the transaction in full isolation mode, and retry in case of a conflict
returnPromise.try(()=>{
// NOTE: We look up by alias, since this is an upsert - and so if the specified ID already exists as an alias, we should update the existing item instead of creating a new one with the specified (aliased) ID
// NOTE: The simulated backend needs access to the 'real' backend; a task may eg. mutate an item based on its current data, and we'd need to read that from the real data source. The only constraint is that the simulated backend cannot *mutate* anything in the real backend, but reading is fine!
// TODO: Should this also lock the task? We probably want to ignore any locks, since this method is primarily used for task logic debugging purposes, and overriding locks would be desirable there.
// NOTE: We only pass in the item data itself, *not* any associated metadata like tags. If the scraping task wants access to that sort of information, it should do a `getItem` call from within its task logic where needed.
// FIXME: Is that actually still true post-refactor?
returntask.run({
data:item.data,
...api.exposed
});
}).then(()=>{
// NOTE: We only apply changes at the very end (outside of simulation mode), so that when a task implementation contains multiple operations, each of those operation always 'sees' the state at the start of the task, not the state after the previous mutation. This makes the model as a whole easier to reason about. In simulation mode, all calls are immediate and the queue is empty - after all, no mutation can happen in that case anyway. This is also another reason to ensure that operations in live mode always see the starting state; that makes its behaviour consistent with simulation mode.
// NOTE: The unlock deliberately happens outside of a transaction, so that it can always succeed, even if a task and its associated database changes failed
// NOTE: We only pass in the item data itself, *not* any associated metadata like tags. If the scraping task wants access to that sort of information, it should do a `getItem` call from within its task logic where needed.
// FIXME: Is that actually still true post-refactor?
// NOTE: The default here is for cases where a task is 'orphaned' and not associated with any tags; this can happen during development, and in that case the task won't be present in the tagsMapping at all.