From d30350fae322fce94d86802a27f658cba3089a86 Mon Sep 17 00:00:00 2001 From: "Johannes J. Schmidt" Date: Wed, 21 May 2014 11:37:43 +0200 Subject: [PATCH] Rewrite using couch-daemon --- .jshintrc | 14 -- README.md | 156 ++++++++++++----- index.js | 106 ++---------- lib/couchmagick-stream.js | 340 ++++++++++++++++++++++++++++++++++++++ package.json | 17 +- 5 files changed, 473 insertions(+), 160 deletions(-) delete mode 100644 .jshintrc create mode 100644 lib/couchmagick-stream.js diff --git a/.jshintrc b/.jshintrc deleted file mode 100644 index 8c86fc7..0000000 --- a/.jshintrc +++ /dev/null @@ -1,14 +0,0 @@ -{ - "curly": true, - "eqeqeq": true, - "immed": true, - "latedef": true, - "newcap": true, - "noarg": true, - "sub": true, - "undef": true, - "unused": true, - "boss": true, - "eqnull": true, - "node": true -} diff --git a/README.md b/README.md index c29c944..c3212a4 100644 --- a/README.md +++ b/README.md @@ -1,31 +1,36 @@ -couchmagick -=========== +# couchmagick Run ImageMagicks `convert` on CouchDB documents. couchmagick runs as an [os_daemon](http://docs.couchdb.org/en/1.5.x/config/externals.html#os_daemons), which means that CouchDB manages the process and you can configure it using CouchDBs configuration mechanism, which is both a huge win. -The versions and commandline arguments are defined in design documents under a `couchmagick` section. +Versions and commandline arguments which are passed to `convert` are defined in design documents under a `couchmagick` section. -couchmagicks Stream based implementation provides low memory footprint. +couchmagicks stream based implementation provides low memory footprint. -Installation ------------- +## Installation The installation of couchmagick is dead simple. -Make sure you have `ImageMagick` installed, eg on Debian: -```bash -apt-get install imagemagick +Make sure you have `ImageMagick` installed, eg on Debian: +`apt-get install imagemagick` + +Install couchmagick via npm: +`npm install couchmagick -g` + +## Commandline Client +You can run couchmagick from the commandline: +```shell +couchmagick ``` -Install couchmagick via npm: +The options explained above can be given as commandline parameters (prefixed with +`--`) or environment variables (UPPERCASED). -```bash -npm install couchmagick -g +```shell +couchmagick --username bernd --password secure --whitelist projects --concurrency 8 --timeout 1000 ``` -Daemon Configuration --------------------- +## Daemon Configuration Add couchmagick to `os_daemons` config section: ```ini @@ -37,16 +42,10 @@ Now CouchDB takes care of the couchmagick process. ```ini [couchmagick] -; Optional username and password, used by the workers to access the database -username = mein-user +; Optional username and password, used by the workers to access the database. +; Default is null. +username = bernd password = secure -; Number of simultaneous changes feeds in parallel. Default is 20. -; Increase it to at least the number of your databases, to get best performance. -; If streams is less than the number of databases, all databases will still be queried -; but in intervals of the changes_feed_timeout (see below). You should keep the -; streams parameter equal to or larger than the number of databases in the server -; usually. -streams = 20 ; Concurrency level (number of simultanous convert processes per stream). Default is 1. ; this should be set to the number of cores of your cpu for optimum performance, but ; it really depends on the number of databases and their usage patterns. @@ -54,27 +53,21 @@ concurrency = 1 ; Timeout for a convert process in ms. Default is 60000 (1min). This should be plenty ; for the usual image resizes, increase it if you deal with really large images and complex ; imagemagick processing. -convert_process_timeout = 60000 -; Timeout for changes feed in ms. Default is 60000. See the 'streams' parameter above -; if you have a really large number of databases in your server and cannot afford to -; have a changes feed open to each of them. -changes_feed_timeout = 60000 -; Batch size. This limits the batches the workers will take from the changes feed. -; It basically translates to a limit parameter on the changes feed. Default is 0. -; IMPORTANT NOTE: currently you should leave it at 0 unless other feeds than -; `continuous` are supported by couchmagick, because the feed does not stop -; after `limit` changes has been arrived. -; See https://github.com/jo/couchmagick/issues/7 -limit = 0 +timeout = 60000 +; Only documents in the databases above are processed (seperate with comma) +; whitelist = mydb,otherdb +; Ignore the following databases (again comma seperated list) +blacklist = _users,_replicator ``` -Imagemagick Configuration -------------------------- +## Imagemagick Configuration Add a `couchmagick` property to a design document. couchmagick will process all databases which have such a design document. + +### Minimal Example ```json { - "_id": "_design/my-couchmagick-config", + "_id": "_design/minimal-couchmagick-config", "_rev": "1-a653b27246b01cf9204fa9f5dee7cc64", "couchmagick": { "versions": { @@ -88,15 +81,88 @@ databases which have such a design document. } ``` -See [couchmagick-stream](https://github.com/null2/couchmagick-stream) for available options. +### `filter` +There are two kinds of filters which you can define: one operates on doc level +and one on version level. + +#### Document Filter +This filter is called with one argument: document. + +#### Version Filter +This filter is called with two arguments, document and attachment name. + +### `content_type` +Content-Type of the resulting attachment. Default is `image/jpeg`. + +### `id` +The document id where the version is stored. Defaults to `{id}/{version}`. + +Can have the following placeholders: +* `id` - the original doc id +* `parts` - array of the id splitted at `/` +* `version` - name of the version + +### `name` +The attachment name of the version. Default is `{basename}-{version}{extname}`. + +Can have placeholders: +* `id` - the original doc id +* `parts` - array of the id splitted at `/` +* `version` - name of the version +* `name` - original attachment name, eg `this/is/my-image.jpg` +* `extname` - file extenstion of the original attachment name, eg `.jpg` +* `basename` - basename without extension, eg `my-image` +* `dirname` - directory name, eg `this/is` +* `version` - name of the version + +### `args` +Array of argument strings for ImageMagicks `convert`. +The default is `['-', 'jpg:-']`, which means that ImageMagick converts the image +to `jpg`. You can see that we use `convert` with pipes for in- and output. -Contributing ------------- -Lint your code via `npm run jshint`. +See [ImageMagick Convert Command-line Tool](http://www.imagemagick.org/script/convert.php) +for a comprehensive list of options. -License -------- -Copyright (c) 2012-2013 Johannes J. Schmidt, null2 GmbH +### Advanced Example +```json +{ + "_id": "_design/advanced-couchmagick-config", + "_rev": "1-0b42e71d7b179c7e44a436704e4fd8e3", + "couchmagick": { + "filter": "function(doc) { return doc.type === 'post'; }", + "versions": { + "medium": { + "id": "{id}-{version}", + "name": "{basename}/{version}{extname}", + "args": [ + "-resize", "800x600", + "-quality", "75", + "-colorspace", "sRGB", + "-strip" + ] + }, + "large": { + "filter": "function(doc, name) { return name.match(/^header/); }", + "id": "{id}-header", + "name": "header/large.jpg", + "args": [ + "-quality", "75", + "-unsharp", "0", + "-colorspace", "sRGB", + "-interlace", "Plane", + "-strip", + "-density", "72", + "-resize", "960x320^", + "-gravity", "center", + "-crop", "960x320+0+0", "+repage" + ] + } + } + } +} +``` +## License +Copyright (c) 2012-2014 Johannes J. Schmidt, null2 GmbH Licensed under the MIT license. diff --git a/index.js b/index.js index e6b81d1..98edf33 100755 --- a/index.js +++ b/index.js @@ -1,103 +1,25 @@ #!/usr/bin/env node -/* couchmagick - * (c) 2013 Johannes J. Schmidt, null2 GmbH, Berlin +/* couchmagick: Run ImageMagicks `convert` on CouchDB documents. + * + * (c) 2014 Johannes J. Schmidt, null2 GmbH, Berlin */ var pkg = require('./package.json'); +var couchmagickstream = require('./lib/couchmagick-stream'); -var url = require('url'); -var async = require('async'); -var nano = require('nano'); -var magick = require('couchmagick-listen'); var daemon = require('couch-daemon'); +var _ = require('highland'); -var couchmagick = daemon(process.stdin, process.stdout, function() { - process.exit(0); -}); - -couchmagick.get({ - // Connection - address: 'httpd.bind_address', - port: 'httpd.port', - auth: { - username: pkg.name + '.username', - password: pkg.name + '.password' - }, - - // Batching - concurrency: pkg.name + '.concurrency', - streams: pkg.name + '.streams', - limit: pkg.name + '.limit', - - changes_feed_timeout: pkg.name + '.changes_feed_timeout', - convert_process_timeout: pkg.name + '.convert_process_timeout' -}, function(err, config) { - if (err) { - return process.exit(0); - } - - // defaults - config.concurrency = config.concurrency && parseInt(config.concurrency, 10) || 1; - config.streams = config.streams && parseInt(config.streams, 10) || 1; - config.limit = config.limit && parseInt(config.limit, 100) || 0; - config.changes_feed_timeout = config.changes_feed_timeout && parseInt(config.changes_feed_timeout, 10) || 10000; - config.convert_process_timeout = config.convert_process_timeout && parseInt(config.convert_process_timeout, 10) || 60000; - couchmagick.info('using config ' + JSON.stringify(config).replace(/"password":".*?"/, '"password":"***"')); +daemon({ + name: pkg.name, + version: pkg.version, + include_docs: true +}, function(url, options) { + var magick = _(couchmagickstream(url, options)); - - // TODO: validate config - - - var couch = url.format({ - protocol: 'http', - hostname: config.address, - port: config.port, - auth: config.auth && config.auth.username && config.auth.password ? [ config.auth.username, config.auth.password ].join(':') : null - }); - - var options = { - limit: config.limit, - feed: 'continuous', - changes_feed_timeout: config.changes_feed_timeout, - convert_process_timeout: config.convert_process_timeout, - concurrency: config.concurrency + return function(source) { + return source.pipe(magick); }; - - - function listen(db, next) { - couchmagick.info('Listening on ' + db); - - var stream = magick(url.resolve(couch, db), options); - - stream.on('error', couchmagick.error); - stream.on('data', function(data) { - if (data.response) { - couchmagick.info(data.response); - } - }); - stream.on('end', next); - } - - - // main loop ;) - function run(err) { - if (err) { - process.exit(0); - } - - - // get list of all databases - // TODO: listen to db changes - nano(couch).db.list(function(err, dbs) { - if (err) { - couchmagick.error('Can not get _all_dbs: ' + err.description); - - return process.exit(0); - } - - async.eachLimit(dbs, config.streams, listen, run); - }); - } - run(); }); + diff --git a/lib/couchmagick-stream.js b/lib/couchmagick-stream.js new file mode 100644 index 0000000..c70bfa4 --- /dev/null +++ b/lib/couchmagick-stream.js @@ -0,0 +1,340 @@ +/* couchmagick: Run ImageMagicks `convert` on CouchDB documents. + * + * (c) 2014 Johannes J. Schmidt, null2 GmbH, Berlin + */ + +var path = require('path'); +var spawn = require('child_process').spawn; +var _ = require('highland'); +var async = require('async'); +var nano = require('nano'); +var strformat = require('strformat'); + + +// TODO: emit all documents, also filtered one, to enable checkpointing +module.exports = function couchmagick(url, options) { + options = options || {}; + options.concurrency = options.concurrency || 1; + options.timeout = options.timeout || 60 * 1000; // 1 minute + + + var couch = nano(url); + var configs = {}; + + + // convert attachment + // TODO: process + // * configs + // * attachments + // * versions + // in one go. Don't do the through split pipeline stuff. + function convertAttachment(data, callback) { + var db = couch.use(data.db_name); + + // get target doc + db.get(data.target.id, function(err, doc) { + data.target.doc = doc || { _id: data.target.id }; + data.target.doc.couchmagick = data.target.doc.couchmagick || {}; + data.target.doc.couchmagick[data.target.id] = data.target.doc.couchmagick[data.target.id] || {}; + + + // do not process attachments twice, respect revpos + if (data.target.doc.couchmagick[data.target.id][data.target.name] && data.target.doc.couchmagick[data.target.id][data.target.name].revpos === data.source.revpos) { + return callback(null, data); + } + + + // insert couchmagick stamp + data.target.doc.couchmagick[data.target.id][data.target.name] = { + id: data.source.id, + name: data.source.name, + revpos: data.source.revpos + }; + + + // query params, doc_name is used by nano as id + var params = { + doc_name: data.target.id + }; + if (data.target.doc._rev) { + params.rev = data.target.doc._rev; + } + + // attachment multipart part + var attachment = { + name: data.target.name, + content_type: data.target.content_type, + data: [] + }; + + // convert process + var c = spawn('convert', data.args); + + // collect errors + var cerror = []; + c.stderr.on('data', function(err) { + cerror.push(err); + }); + + // convert timeout + var kill = setTimeout(function() { + cerror.push(new Buffer('timeout')); + // send SIGTERM + c.kill(); + }, options.timeout); + + // collect output + c.stdout.on('data', function(data) { + attachment.data.push(data); + }); + + // concat output + c.stdout.on('end', function() { + clearTimeout(kill); + attachment.data = Buffer.concat(attachment.data); + }); + + // convert finish + c.on('close', function(code) { + // store exit code + data.code = code; + data.target.doc.couchmagick[data.target.id][data.target.name].code = data.code; + + if (code === 0) { + // no error: make multipart request + return db.multipart.insert(data.target.doc, [attachment], params, function(err, response) { + if (err) { + return callback(err); + } + + data.response = response; + if (response.rev) { + data.target.rev = response.rev; + } + + callback(null, data); + }); + } + + // store error + data.error = Buffer.concat(cerror).toString(); + data.target.doc.couchmagick[data.target.id][data.target.name].error = data.error; + + // store document stup, discard attachment + db.insert(data.target.doc, data.target.id, function(err, response) { + if (err) { + return callback(err); + } + + data.response = response; + if (response.rev) { + data.target.rev = response.rev; + } + + callback(null, data); + }); + }); + + + // request attachment and pipe it into convert process + db.attachment.get(data.source.id, data.source.name).pipe(c.stdin); + }); + } + + + // processing queue + var convert = async.queue(convertAttachment, options.concurrency); + + + return _.pipeline( + // gather configs + _.map(function(data) { + if (data.stream === 'compile') { + var cfg = {}; + cfg[data.id] = data.doc; + + _.extend(cfg, configs); + } + return data; + }), + + // Decide whether a whole doc needs processing at all + _.filter(function(data) { + if (!data.doc) { + return false; + } + if (!data.doc._attachments) { + return false; + } + + if (!Object.keys(data.doc._attachments).length) { + return false; + } + + return true; + }), + + // split stream into each config + // TODO: this prevents us from supporting multiple attachments per document + // and therefore needs serialisation + _.map(function(data) { + return Object.keys(configs).map(function(config) { + return { + db_name: data.db_name, + seq: data.seq, + doc: data.doc, + config: configs[config] + }; + }); + }), + _.flatten(), + + // Decide if couchmagick should be run on a specific attachment + _.filter(function(data) { + if (typeof data.config.filter === 'function' && !data.config.filter(data.doc)) { + return false; + } + + return true; + }), + + // split stream into attachments + // TODO: this prevents us from supporting multiple attachments per document + // and therefore needs serialisation + _.map(function(data) { + return Object.keys(data.doc._attachments).map(function(name) { + return { + db_name: data.db_name, + seq: data.seq, + doc: data.doc, + config: data.config, + name: name + }; + }); + }), + _.flatten(), + + // filter attachments with builtin + _.filter(function(data) { + if (!data.doc) { + return false; + } + if (!data.name) { + return false; + } + + return data.doc._attachments[data.name].content_type.match(/^image\//); + }), + + // split stream into versions + // TODO: this prevents us from supporting multiple attachments per document + // and therefore needs serialisation + _.map(function(data) { + return Object.keys(data.config.versions).map(function(key) { + var version = data.config.versions[key]; + + // version defaults + version.id = version.id || '{id}/{version}'; + version.name = version.name || '{basename}-{version}{extname}'; + version.content_type = version.content_type || 'image/jpeg'; + version.args = version.args || []; + + // first arg is input pipe + if (!version.args[0] || version.args[0] !== '-') { + version.args.unshift('-'); + } + // last arg is output pipe + if (version.args.length < 2 || !version.args[version.args.length - 1].match(/^[a-z]{0,3}:-$/)) { + version.args.push('jpg:-'); + } + + // run version filter + if (typeof version.filter === 'function' && !version.filter(data.doc, data.name)) { + return; + } + + // construct target doc + var id = strformat(version.id, { + id: data.doc._id, + parts: data.doc._id.split('/'), + version: key + }); + var name = strformat(version.name, { + id: data.doc._id, + parts: data.doc._id.split('/'), + version: key, + + name: data.name, + extname: path.extname(data.name), + basename: path.basename(data.name, path.extname(data.name)), + dirname: path.dirname(data.name) + }); + + + return { + db_name: data.db_name, + seq: data.seq, + source: { + id: data.doc._id, + name: data.name, + revpos: data.doc._attachments[data.name].revpos, + couchmagick: data.doc.couchmagick + }, + args: version.args, + target: { + id: id, + name: name, + content_type: version.content_type + } + }; + }); + }), + _.flatten(), + + + // filter derived versions to prevent cascades + // eg: + // single-attachment/thumbnail + // single-attachment/thumbnail/thumbnail + // single-attachment/thumbnail/thumbnail/thumbnail + _.filter(function(data) { + var derivative = data.source.couchmagick && + data.source.couchmagick[data.source.id] && + data.source.couchmagick[data.source.id][data.source.name] && + data.source.couchmagick[data.source.id][data.source.name].id; + + return !derivative; + }), + + + // process attachments + _.through(function(source) { + return _(function(push, done) { + source + .on('data', function(data) { + convert.push(data, function(err, res) { + push(err, res); + }); + }) + .on('error', push) + .on('end', done); + }); + }), + + _.map(function(data) { + if (!data.response) { + return data; + } + + delete data.seq; + + return { + db_name: data.db_name, + seq: data.seq, + type: 'log', + message: 'Complete: ' + JSON.stringify(data.response) + }; + }) + ); +}; + diff --git a/package.json b/package.json index e4d6593..6c6479e 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "couchmagick", - "version": "1.8.3", + "version": "2.0.0", "description": "Run ImageMagicks `convert` on CouchDB documents.", "main": "index.js", "preferGlobal": true, @@ -10,7 +10,7 @@ "url": "https://github.com/jo/couchmagick" }, "scripts": { - "jshint": "jshint -c .jshintrc index.js" + "test": "echo \"Error: no test specified\" && exit 1" }, "keywords": [ "couchdb", @@ -27,12 +27,11 @@ }, "homepage": "https://github.com/jo/couchmagick", "dependencies": { - "async": "~0.2.9", - "couchmagick-listen": "~1.6.0", - "couch-daemon-bridge": "^1.2.2", - "nano": "~4.2.1" - }, - "devDependencies": { - "jshint": "~2.3.0" + "couch-daemon": "^1.2.10", + "strformat": "0.0.3", + "docuri": "^1.1.0", + "async": "^0.9.0", + "nano": "^5.9.1", + "highland": "^1.25.1" } }