Nodejs下载多个文件

时间:2015-06-02 04:45:03

标签: node.js image download

我需要下载~26k图像。图像列表和URL存储在csv文件中。我正在读取csv文件并尝试在循环列表时下载图像。

如果即时通讯使用小套装~1-2k它工作正常,但当我切换到全套我得到EMFILE错误。

Error: EMFILE, open 'S:\images_download\Images\189900008.jpg'

我注意到该节点尝试一次创建所有文件,这可能是问题,但我无法强制它逐个创建它。我的理解是下面的代码应该像这样工作,但显然不是。

(仅提及此代码在Windows上执行)

代码:

var csv     = require("fast-csv");
var fs      = require('fs');
var request = require('request');
var async   = require('async');

fs.writeFile('errors.txt', '', function(){})

var downloaded = 0;
var totalImages = 0;
var files = [];

csv
 .fromPath("Device_Images_List.csv")
 .on("data", function(data){
    files.push({device: data[0], url: data[1]})
 })
 .on("end", function(){     
    totalImages = files.length;

    async.each(files, function(file, callback) {
        var deviceId = file.device;
        var deviceUrl = file.url;   

        if ( deviceId != 'DEVICE_TYPE_KEY' ) {
                try {
                    writeStream = fs.createWriteStream('./Images/' + deviceId + '.jpg');
                    proxiedRequest = request.defaults({proxy: "http://proxy:8080"});
                    proxiedRequest(deviceUrl).pipe(writeStream); 
                    writeStream.on('open', function(fd) {

                        var rem = proxiedRequest.get(deviceUrl);

                        rem.on('data', function(chunk) {        
                            writeStream.write(chunk);       
                        });
                        rem.on('end', function() {
                            downloaded++;
                            console.log('Downloaded: ' + deviceId + '; ' + (downloaded + 1) + ' of ' + totalImages);
                            writeStream.end();                      
                        });

                    });

                    writeStream.on('close', function(){
                        callback();
                    });

                } catch (ex) {
                    fs.appendFile('errors.txt', deviceId + ' failed to download', function (err) {
                        callback();
                    });
                }
        }       
    }, function(err){
        if( err ) {
          console.log(err);
        } else {

        }
    }); 
});

1 个答案:

答案 0 :(得分:2)

正如@slebetman评论的那样,可以通过使用async.eachSeries逐个处理文件或async.eachLimit来限制并行节点来解决问题:

async.eachLimit(files, 5, function(file, callback) {
 // ... Process 5 files at the same time
}, function(err){
});