我正在编写一个备份脚本,只需下载特定Azure帐户的所有blob容器中的所有blob。
该脚本使用async.js来确保只有这么多线程可以同时运行,因此它不会使服务器过载。当我运行这个脚本时,它工作正常,但是当它遇到大文件时,它会耗尽内存。我猜测下载运行速度比磁盘写入速度快,并且它最终填满了内存缓冲区,以至于我完全耗尽了内存,但到目前为止调试确切的原因是不可能的。
看起来使用大量内存的特定功能如下所示:
blobService.getBlobToStream(
containerName,
blob.name,
fs.createWriteStream(fullPath),
function(error) {
if(error){ //Something went wrong, write it to the console but finish the queue item and continue.
console.log("Failed writing " + blob.name + " (" + error + ")");
callback();
}
else if(!error) { //Write the last modified date and finish the queue item silently
fs.writeFile(fullPath + ".date", blobLastModified, function(err)
{ if(err) console.log("Couldn't write .date file: " + err); });
callback();
}
});
即使只有700MB的下载也很容易让我的内存占用1GB。
这有什么办法吗?我错过了一个参数,它会神奇地阻止Azure SDK缓冲所有内容和厨房水槽吗?
完整代码:
#!/usr/bin/env node
//Requires
var azure = require('azure');
var fs = require('fs');
var mkdirp = require('mkdirp');
var path = require('path');
var async = require('async');
var maxconcurrency = 1; //Max amount of simultaneous running threads of getBlobsAndSaveThem() running through async.js.
var blobService = azure.createBlobService();
backupPrefix='/backups/azurebackup/' //Always end with a '/'!!
//Main flow of the script is near the bottom of the file.
var containerProcessingQueue = async.queue(
function getBlobsAndSaveThem(containerName) {
console.log(containerName); //DEBUG
blobService.listBlobs(containerName,
function(error, blobs) {
if(!error){
var blobProcessingQueue =
async.queue(function(index,callback) {
var blob = blobs[index];
console.log(blob); //DEBUG
var fullPath = backupPrefix + containerName + '/' + blob.name;
var blobLastModified = new Date(blob.properties['last-modified']);
//Only create if the directoy doesn't exist, since mkdirp fails if the directory exists.
if(!fs.existsSync(path.dirname(fullPath))){ //And do it sync, because otherwise it'll check 99999 times if the directory exists simultaneously, doesn't find it, then fails to create it 99998 times.
mkdirp.sync(path.dirname(fullPath), function(err) { console.log('Failed to create directory ' + path.dirname(fullPath) + " ("+ err + ")"); });
}
if(fs.existsSync(fullPath + ".date")){
if(blobLastModified == fs.readFileSync(fullPath + ".date").toString()) {
callback();
return; //If the file is unmodified, return. No this won't exit the program, because it's called within a function definition (async.queue(function ...))
}
}
blobService.getBlobToStream(
containerName,
blob.name,
fs.createWriteStream(fullPath),
function(error) {
if(error){ //Something went wrong, write it to the console but finish the queue item and continue.
console.log("Failed writing " + blob.name + " (" + error + ")");
callback();
}
else if(!error) { //Write the last modified date and finish the queue item silently
fs.writeFile(fullPath + ".date", blobLastModified, function(err)
{ if(err) console.log("Couldn't write .date file: " + err); });
callback();
}
});
},maxconcurrency);
for(var blobindex in blobs){
blobProcessingQueue.push(blobindex);
} //Push new items to the queue for processing
}
else {
console.log("An error occurred listing the blobs: " + error);
}
});
},1);
blobService.listContainers(function(err, result){
for(var i=0;i<result.length;i++) {
containerProcessingQueue.push(result[i].name);
}
});
答案 0 :(得分:2)
对于现在好奇的所有人来说,开始和结束的变量都已经改变了。它们现在只是rangeStart和rangeEnd。 这是azure节点文档以获得更多帮助 http://dl.windowsazure.com/nodestoragedocs/BlobService.html
答案 1 :(得分:1)
您可能做的一件事是只将一大块数据读入流而不是整个blob数据,将其附加到文件并读取下一个块。 Blob Storage服务支持这一点。如果您查看getBlobToStream
(https://github.com/WindowsAzure/azure-sdk-for-node/blob/master/lib/services/blob/blobservice.js)的源代码,则可以在选项中指定from / to bytes - rangeStartHeader
和rangeEndHeader
。看看是否有帮助。
我已经破解了一些代码(正如你从我的代码中看到的,我对node.js的了解非常原始:))。 [请使用此代码,以便了解如何进行分块下载,因为我认为它仍然存在一些问题]
var azure = require('azure');
var fs = require('fs');
var blobService = azure.createBlobService("account", "accountkey");
var containerName = "container name";
var blobName = "blob name";
var blobSize;
var chunkSize = 1024 * 512;//chunk size -- we'll read 512 KB at a time.
var startPos = 0;
var fullPath = "D:\\node\\";
var blobProperties = blobService.getBlobProperties(containerName, blobName, null, function (error, blob) {
if (error) {
throw error;
}
else {
blobSize = blob.contentLength;
fullPath = fullPath + blobName;
console.log(fullPath);
doDownload();
}
}
);
function doDownload() {
var stream = fs.createWriteStream(fullPath, {flags: 'a'});
var endPos = startPos + chunkSize;
if (endPos > blobSize) {
endPos = blobSize;
}
console.log("Downloading " + (endPos - startPos) + " bytes starting from " + startPos + " marker.");
blobService.getBlobToStream("test", blobName, stream,
{ "rangeStartHeader": startPos, "rangeEndHeader": endPos-1 }, function(error) {
if (error) {
throw error;
}
else if (!error) {
startPos = endPos;
if (startPos <= blobSize - 1) {
doDownload();
}
}
});
}