我需要下载~26k图像。图像列表和URL存储在csv文件中。我正在读取csv文件并尝试在循环列表时下载图像。
如果即时通讯使用小套装~1-2k它工作正常,但当我切换到全套我得到EMFILE错误。
Error: EMFILE, open 'S:\images_download\Images\189900008.jpg'
我注意到该节点尝试一次创建所有文件,这可能是问题,但我无法强制它逐个创建它。我的理解是下面的代码应该像这样工作,但显然不是。
(仅提及此代码在Windows上执行)
代码:
var csv = require("fast-csv");
var fs = require('fs');
var request = require('request');
var async = require('async');
fs.writeFile('errors.txt', '', function(){})
var downloaded = 0;
var totalImages = 0;
var files = [];
csv
.fromPath("Device_Images_List.csv")
.on("data", function(data){
files.push({device: data[0], url: data[1]})
})
.on("end", function(){
totalImages = files.length;
async.each(files, function(file, callback) {
var deviceId = file.device;
var deviceUrl = file.url;
if ( deviceId != 'DEVICE_TYPE_KEY' ) {
try {
writeStream = fs.createWriteStream('./Images/' + deviceId + '.jpg');
proxiedRequest = request.defaults({proxy: "http://proxy:8080"});
proxiedRequest(deviceUrl).pipe(writeStream);
writeStream.on('open', function(fd) {
var rem = proxiedRequest.get(deviceUrl);
rem.on('data', function(chunk) {
writeStream.write(chunk);
});
rem.on('end', function() {
downloaded++;
console.log('Downloaded: ' + deviceId + '; ' + (downloaded + 1) + ' of ' + totalImages);
writeStream.end();
});
});
writeStream.on('close', function(){
callback();
});
} catch (ex) {
fs.appendFile('errors.txt', deviceId + ' failed to download', function (err) {
callback();
});
}
}
}, function(err){
if( err ) {
console.log(err);
} else {
}
});
});
答案 0 :(得分:2)
正如@slebetman评论的那样,可以通过使用async.eachSeries逐个处理文件或async.eachLimit来限制并行节点来解决问题:
async.eachLimit(files, 5, function(file, callback) {
// ... Process 5 files at the same time
}, function(err){
});