将zip文件分段上传到AWS Glacier中途冻结

时间:2017-01-11 01:25:48

标签: node.js amazon-web-services buffer amazon-glacier

我尝试使用aws-sdk节点版本的multipartupload函数将600mb .zip文件上传到冰川。我想出了如何将文件作为缓冲区读取并使用aws文档中的脚本开始上传。

脚本会为文件的每个部分启动上传,但每个部分都会因400错误而失败。

Uploading part 0 = bytes 0-2097151/*
Uploading part 2097152 = bytes 2097152-4194303/*
Uploading part 4194304 = bytes 4194304-6291455/*
Uploading part 6291456 = bytes 6291456-8388607/*
....
Uploading part 591396864 = bytes 591396864-591798963/*
//stops logging, then a couple seconds later, it starts returning an error message like this for each upload part:

{ [UnknownError: 400]
  message: '400',
  code: 'UnknownError',
  statusCode: 400,
  time: Tue Jan 10 2017 20:54:29 GMT-0500 (EST),
  requestId: 'F16FEDE011D3039A',
  retryable: false,
  retryDelay: 91.54012566432357 }

以下是我使用

的上传脚本
var AWS = require('aws-sdk');
var creds = <path to creds>
var fs =  require('fs');
var filePath = <path to file>;
var encoding = "utf8";

var myConfig = new AWS.Config({
  accessKeyId: creds.AccessKeyID,
  secretAccessKey: creds.SecretAccessKey,
  region: 'us-west-1'
});

var glacier = new AWS.Glacier(myConfig)

var buffer = fs.readFileSync(filePath);
// var buffer = new Buffer(2.5 * 1024 * 1024); // 2.5MB buffer
var partSize = 1024 * 1024; // 1MB chunks,
var numPartsLeft = Math.ceil(buffer.length / partSize);
var startTime = new Date();

var params = {
  accountId: '-',
  vaultName: <vault name>
  archiveDescription: '100media',
  partSize: partSize.toString(),
};

// Compute the complete SHA-256 tree hash so we can pass it
// to completeMultipartUpload request at the end
var treeHash = glacier.computeChecksums(buffer).treeHash;

// Initiate the multipart upload
console.log('Initiating upload to', params.vaultName);
glacier.initiateMultipartUpload(params, function (mpErr, multipart) {
    if (mpErr) { console.log('Error!', mpErr.stack); return; }
    console.log("Got upload ID", multipart.uploadId);

    // Grab each partSize chunk and upload it as a part
    for (var i = 0; i < buffer.length; i += partSize) {
        var end = Math.min(i + partSize, buffer.length),
            partParams = {
                vaultName: params.vaultName,
                uploadId: multipart.uploadId,
                range: 'bytes ' + i + '-' + (end-1) + '/*',
                body: buffer.slice(i, end)
            };

        // Send a single part
        console.log('Uploading part', i, '=', partParams.range);
        glacier.uploadMultipartPart(partParams, function(multiErr, mData) {
            if (multiErr) return;
            console.log("Completed part", this.request.params.range);
            if (--numPartsLeft > 0) return; // complete only when all parts uploaded

            var doneParams = {
                vaultName: params.vaultName,
                uploadId: multipart.uploadId,
                archiveSize: buffer.length.toString(),
                checksum: treeHash // the computed tree hash
            };

            console.log("Completing upload...");
            glacier.completeMultipartUpload(doneParams, function(err, data) {
                if (err) {
                    console.log("An error occurred while uploading the archive");
                    console.log(err);
                } else {
                    var delta = (new Date() - startTime) / 1000;
                    console.log('Completed upload in', delta, 'seconds');
                    console.log('Archive ID:', data.archiveId);
                    console.log('Checksum:  ', data.checksum);
                }
            });
        });
    }
});

对400错误来自何处的任何想法都将不胜感激!我以前没有使用缓冲区或二进制数据,所以我可能会搞乱这个格式。另一个嫌疑人是我只是格式化冰川请求或者说错误。

1 个答案:

答案 0 :(得分:0)

这是我创建的一个脚本,它一次尝试一个多部分上传。 “可以尝试重做并发,但它按原样工作,如果上传失败则重试:

var minm = require('minimist');

var argv = require('minimist')(process.argv.slice(2));
var AWS = require('aws-sdk');
var creds = <path to local json creds>
var fs =  require('fs');
var encoding = "utf8";
var partSize = 1024 * 1024; // 1MB chunks,
var startTime = new Date();
var byteIncrementer = 0;
var MBcounter = 0;
var multipart;

//move these out to args
var filePath = argv.filepath;
var vaultName = argv.vaultname 
var archiveDescription = argv.description

if (!filePath) {
    throw "ERROR: must pass file path via --filepath <filepath>"
}

if (!archiveDescription) {
    throw "ERROR: must pass description path via --description <description>"
}

var myConfig = new AWS.Config({
  accessKeyId: creds.AccessKeyID,
  secretAccessKey: creds.SecretAccessKey,
  region: <region>
});
var params = {
  accountId: '-',
  vaultName: vaultName,
  archiveDescription: archiveDescription,
  partSize: partSize.toString(),
};

var buffer = fs.readFileSync(filePath);
var numPartsLeft = Math.ceil(buffer.length / partSize);
var glacier = new AWS.Glacier(myConfig)
var treeHash = glacier.computeChecksums(buffer).treeHash;

new Promise(function (resolve, reject) {
    glacier.initiateMultipartUpload(params, function (mpErr, multi) {
        if (mpErr) { console.log('Error!', mpErr.stack); return; }
        console.log("Got upload ID", multi.uploadId);
        multipart = multi
        resolve();
    });
}).then(function () {
    console.log("total upload size: ", buffer.length);
    recursivelyUploadPart(byteIncrementer)
}).catch(function (err) {console.log(err)});

function recursivelyUploadPart() {
    var end = Math.min(byteIncrementer + partSize, buffer.length);

    var partParams = {
        accountId: '-',
        uploadId: multipart.uploadId,
        vaultName: params.vaultName,
        range: 'bytes ' + byteIncrementer + '-' + (end-1) + '/*',
        body: buffer.slice(byteIncrementer, end)
    };

    console.log('Uploading part', byteIncrementer, '=', partParams.range);
    glacier.uploadMultipartPart(partParams, function(multiErr, mData) {
        if (multiErr) {
            console.log('part upload error: ', multiErr)
            console.log('retrying')
            return recursivelyUploadPart(byteIncrementer)
        } else {
            console.log("Completed part", this.request.params.range);

            if (--numPartsLeft > 0) {
                MBcounter++;
                console.log("MB Uploaded: ", MBcounter);
                byteIncrementer += partSize;
                console.log('recursing');
                return recursivelyUploadPart(byteIncrementer);
            } else {
                var doneParams = {
                    vaultName: params.vaultName,
                    uploadId: multipart.uploadId,
                    archiveSize: buffer.length.toString(),
                    checksum: treeHash // the computed tree hash
                };
                console.log("Completing upload...");
                glacier.completeMultipartUpload(doneParams, function(err, data) {
                    if (err) {
                        console.log("An error occurred while uploading the archive: ", err);
                    } else {
                        var delta = (new Date() - startTime) / 1000;
                        console.log('Completed upload in', delta, 'seconds');
                        console.log('Archive ID:', data.archiveId);
                        console.log('Checksum:  ', data.checksum);
                        console.log("==============================");
                        console.log('COMPLETED');
                        console.log("==============================");
                    }
                });
            }
        }
    });
};

正如评论中所提到的,看起来我正在打开大量的http连接,并试图同时做所有事情,这是行不通的。