You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
96 lines
3.3 KiB
CoffeeScript
96 lines
3.3 KiB
CoffeeScript
fs = require "fs"
|
|
path = require "path"
|
|
Promise = require "bluebird"
|
|
_ = require "lodash"
|
|
iaHeaders = require "ia-headers"
|
|
moment = require "moment"
|
|
streamLength = require "stream-length"
|
|
bhttp = require "bhttp"
|
|
debug = require("debug")("pdfy:task:mirror")
|
|
|
|
retryTimeout = 20 * 1000
|
|
|
|
retryTask = (task, context) ->
|
|
debug("received 'Slow Down' from IA, rescheduling upload in #{retryTimeout / 1000} seconds...")
|
|
return new Promise (resolve, reject) ->
|
|
setTimeout (->
|
|
runTask task, context
|
|
.then (result) -> resolve(result)
|
|
.catch (err) -> reject(err)
|
|
), retryTimeout
|
|
|
|
runTask = (task, context) ->
|
|
Promise.try ->
|
|
context.db.model("Document").getOneWhere SlugId: task.id
|
|
.then (document) ->
|
|
task.document = document
|
|
|
|
if document.get("Public") != 1
|
|
return Promise.reject new Error "Unlisted documents cannot be mirrored."
|
|
|
|
task.stream = fs.createReadStream path.join(context.config.storage_path, document.get('Filename'))
|
|
Promise.all [
|
|
streamLength task.stream
|
|
bhttp.get "http://s3.us.archive.org/?check_limit=1", decodeJSON: true
|
|
]
|
|
.spread (filesize, limitResponse) ->
|
|
if limitResponse.body.over_limit == 1
|
|
retryTask task, context
|
|
else
|
|
Promise.try ->
|
|
uploadDate = moment(task.document.get 'Uploaded')
|
|
if context.config.ia.collection == "test_collection"
|
|
# This is to make sure that we can repeatedly test with the same document while developing, without clobbering the same identifier over and over again.
|
|
slug = "#{task.document.get 'SlugId'}-#{Math.round(Math.random() * 1000000)}"
|
|
else
|
|
slug = task.document.get "SlugId"
|
|
|
|
identifier = "pdfy-#{slug}"
|
|
|
|
metadata =
|
|
subject: ["mirror", "pdf.yt"]
|
|
mediatype: "texts"
|
|
collection: context.config.ia.collection
|
|
date: uploadDate.format 'YYYY-MM-DD'
|
|
title: "#{task.document.get 'OriginalFilename'} (PDFy mirror)"
|
|
description: """
|
|
<p>
|
|
<strong>This public document was automatically mirrored from <a href="https://pdf.yt/">PDFy</a>.</strong>
|
|
</p>
|
|
|
|
<ul>
|
|
<li><strong>Original filename:</strong> #{task.document.get 'OriginalFilename'}</li>
|
|
<li><strong>URL:</strong> <a href="http://pdf.yt/d/#{task.document.get 'SlugId'}">https://pdf.yt/d/#{task.document.get 'SlugId'}</a></li>
|
|
<li><strong>Upload date:</strong> #{uploadDate.format 'MMMM D, YYYY HH:mm:ss'}</li>
|
|
</ul>
|
|
""".replace("\n", "")
|
|
|
|
# TODO: Replace this with a more light-weight extend module such as xtend?
|
|
headers = _({})
|
|
.extend iaHeaders(metadata)
|
|
.extend
|
|
'x-archive-auto-make-bucket': 1
|
|
'x-archive-size-hint': filesize
|
|
'authorization': "LOW #{context.config.ia.access_key}:#{context.config.ia.secret_key}"
|
|
.extend (task.extraHeaders ? {})
|
|
.value()
|
|
|
|
#Promise.resolve(headers)
|
|
bhttp.put "http://s3.us.archive.org/#{identifier}/#{encodeURIComponent(task.document.get 'OriginalFilename')}", task.stream, headers: headers
|
|
.then (response) ->
|
|
switch response.statusCode
|
|
when 200 then Promise.resolve()
|
|
when 503 then retryTask task, context
|
|
else Promise.reject new Error "Received a non-200 response from IA (#{response.statusCode})"
|
|
.then ->
|
|
task.document.set "Mirrored", 1
|
|
task.document.saveChanges()
|
|
.catch (err) ->
|
|
task.document
|
|
.set "Mirrored", 2
|
|
.saveChanges()
|
|
.then -> Promise.reject err
|
|
|
|
|
|
module.exports = runTask
|