异步下载许多文件

时间:2018-09-19 12:04:35

标签: node.js asynchronous download

我想下载this xml中包含的所有mp3文件,因此我使用Node.js和JavaScript创建了以下代码:

var https = require('https');
var fs = require('fs');
var xml2js = require('xml2js');
var parser = new xml2js.Parser();
var request = require('request');

const xmlUrl = 'https://deejayreloadedpodcast.maxxer.it/podcast/pinocchio.xml';

var download = async function(url, dest, callback) {
    // download if only the file is not existing yet
    if(!fs.existsSync(dest)) {
        await request.get(url)
        .on('error', function(err) {
            console.log(err);
        })
        .pipe(fs.createWriteStream(dest))
        .on('close', callback); 
    }
};

https.get(xmlUrl, function(res) {
    var response_data = '';
    res.setEncoding('utf8');

    res.on('data', function(chunk) {
        response_data += chunk;
    });

    res.on('end', function() {
        parser.parseString(response_data, function(err, result) {
            if(err) {
                console.log('Got error: ' + err.message);
            } 
            else {
                var json = JSON.stringify(result, null, 2);

                var channels = result['rss']['channel'];
                var items = channels[0]['item'];

                var urlsTemp = [];
                var namesTemp = [];
                for(var elem in items) {
                    var obj = items[elem];
                    var name = obj['title'][0];
                    var url = obj['enclosure'][0]['$']['url'];
                    urlsTemp.push(url);
                    namesTemp.push(name);
                }

                var urls = [];
                var names = [];
                for(i in urlsTemp) {
                    urls.push(urlsTemp[i]);
                    names.push(namesTemp[i]);
                }

                for(var i = 10; i < 20/*urls.length*/; i++) {
                    var dirPath = './puntate/';
                    var filename =  names[i] + '.mp3';
                    download(urls[i], dirPath + filename, function() {
                        console.log('Finished downloading \'' + filename);
                    });
                }

            }
        });
    });

    res.on('error', function(err) {
        console.log('Got error: ' + err.message);
    });
});

此代码获取XML文件的内容,通过将链接和文件名保存在两个数组(urlsnames)中进行处理,然后下载音频文件。 问题在于,仅一次下载几个mp3(在本示例中只有10个)时,它才有效。 如果我让它从0循环到数组urls的整个长度,则该程序将不再起作用。它不会产生错误,但是会保存大小为0(即空)的所有mp3。

为什么?我以为问题是异步代码,但是我在async/await方法中使用了download。 有什么问题吗?

谢谢


var i = 0; 
var dirPath = './puntate/';
var filename = names[i] + '.mp3';
var fn = function(i) {
    console.log('(A)', i, urls.length);
    download(urls[i], dirPath + filename, function() {
        console.log('Finished downloading \'' + filename);
        console.log('(B)', i, urls.length);
        if(i < urls.length) { 
            i++;
            console.log('(C)', i, urls.length);
            fn(i);
        } 
    });
}
fn(i);

和:

(A) 0 3095
Finished downloading 'Puntata del 17 Settembre 2018.mp3
(B) 0 3095
(C) 1 3095
(A) 1 3095

1 个答案:

答案 0 :(得分:0)

我建议您修改for循环,因为它提供了同步功能:

for(var i = 10; i < 20/*urls.length*/; i++) {
   var dirPath = './puntate/';
   var filename =  names[i] + '.mp3';
   download(urls[i], dirPath + filename, function() {
       console.log('Finished downloading \'' + filename);
   });
}

为连续传递样式:

   var i=0; /*i starts from 0*/
   var dirPath = './puntate/';


   var fn=function(i){
          var filename =  names[i] + '.mp3';
          download(urls[i], dirPath + filename, function() {
             console.log('Finished downloading \'' + filename);
             /*if not finish downloading all the links*/
             if(i<urls.length){ 
               i++;
               fn(i);
             } 
          });
       }   
       fn(i);  

以下是增强的代码版本:

改进:

  • 删除了不必要的for循环
  • 如果文件已存在,请跳过它,直到下一个不存在并打印。

var https = require('https');
var fs = require('fs');
var xml2js = require('xml2js');
var parser = new xml2js.Parser();
var request = require('request');
var urls = [];
var names = [];
const xmlUrl = 'https://deejayreloadedpodcast.maxxer.it/podcast/pinocchio.xml';


var download = async function(url, dest, callback) {

    request.get(url)
    .on('error', function(err) {
        console.log(err);
    })
    .pipe(fs.createWriteStream(dest))
    .on('close', callback); 
};

https.get(xmlUrl, function(res) {
    var response_data = '';
    res.setEncoding('utf8');

    res.on('data', function(chunk) {
        response_data += chunk;
    });

    res.on('end', function() {
        parser.parseString(response_data, function(err, result) {
            if(err) {
                console.log('Got error: ' + err.message);
            } 
            else {
                var json = JSON.stringify(result, null, 2);

                var channels = result['rss']['channel'];
                var items = channels[0]['item'];

               // var urlsTemp = []; //you don't need both of temp arrays
               // var namesTemp = []; //push items directly into urls[] and names[]
                for(var elem in items) {
                    var obj = items[elem];
                    var name = obj['title'][0];
                    var url = obj['enclosure'][0]['$']['url'];
                    urls.push(url);
                    names.push(name);
                }

                var i = 0;
                var dirPath = './puntate/';



                var fn = function(i) {
                    var filename = names[i] + '.mp3';
                    var fileExist=fs.existsSync(dirPath + filename);

                    // skip downloading if the file exists already
                    if(fileExist){
                        console.log('File exists', i, urls.length);
                        i++;
                        fn(i);                      
                    }else{  // download if only the file is not existing yet                    
                        console.log('(A)', i, urls.length);
                        download(urls[i], dirPath + filename, function() {
                            console.log('Finished downloading \'' + filename);
                            console.log('(B)', i, urls.length);
                            if(i < urls.length) { 
                                i++;
                                console.log('(C)', i, urls.length);
                                fn(i);
                            } 
                        });                     
                    }

                }
                fn(i);              

            }
        });
    });

    res.on('error', function(err) {
        console.log('Got error: ' + err.message);
    });
});