使用NodeJS从URL下载大文件时服务器崩溃

时间:2016-11-19 23:52:57

标签: javascript node.js http web-scraping request

我正在尝试使用来自nodejs的请求模块从URL(需要登录)下载文件(+ 200mb)但是当它完成下载时,服务器开始减速直到崩溃或真正慢。

这是我当前的代码(它会下载整个文件,但我的服务器最终会崩溃):

//Required modules
var http = require('http'),
url = require("url"),
fs = require('fs'),
request = require('request'),
path = require("path"),
events = require("events"),
j = request.jar(),
request = request.defaults({ jar : j });

// make te request login in with cookies
console.log("downloading file :)");
request({
    url:"http://example.com/",
    method:"POST",
    form:{u: "username",p: "password"}
    },
    function(error,response,body){
        setTimeout(function(){
            request
              .get('http://example.com/test.ashx?file=15')
              .on('error', function(err) {
                console.log(err);
              })
              .pipe(fs.createWriteStream("/var/www/filesDir/CustomName.zip"));
                console.log(body);
        },1000)
    }
);

我尝试过应用this answer的其他解决方案,但由于某种原因,文件未正确下载,它只显示“下载进度:0字节”,可能是与登录访问相关的内容。

在这里我把我试图从最后一句实现的其他代码

var http = require('http');
var fs = require('fs');
var url = require("url");
var request = require('request');
var path = require("path");
var events = require("events");
var j = request.jar();
var request = request.defaults({ jar : j });


request({
    url:"http://example.com/",
    method:"POST",
    form:{u:"username",p:"password"}
    },  function(error,response,body){
                var downloadfile = "http://example.com/test.ashx?file=15";
                var host = url.parse(downloadfile).hostname;
                var filename = "1977.zip";
                var req = http.request({port: 80, host: host, method: 'GET'});
                console.log("Downloading file: " + filename);
                console.log("Before download request");
                req.end();

                dlprogress = 0;

                setInterval(function () {
                   console.log("Download progress: " + dlprogress + " bytes");
                }, 1000);

                req.addListener('response', function (response) {
                    var downloadfile = fs.createWriteStream(filename, {'flags': 'a'});
                    console.log("File size " + filename + ": " + response.headers['content-length'] + " bytes.");
                    response.addListener('data', function (chunk) {
                        dlprogress += chunk.length;
                        downloadfile.write(chunk, encoding='binary');
                    });
                    response.addListener("end", function() {
                        downloadfile.end();
                        console.log("Finished downloading " + filename);
                    });

                });
        }
);

你决定以哪种方式帮助我并不重要。

1 个答案:

答案 0 :(得分:1)

我最终这样做了,我已多次测试代码,服务器不再崩溃了:

var request = require('request');
var filed = require('filed');
var j = request.jar();
var request = request.defaults({ jar : j });

// make the request and login
request({
    url: "http://example.com/login",
    method:"POST",
    // 'u' and 'p' are the field names on the form
    form:{u:"username",p:"password"}
    }, function(error,response,body){
        setTimeout(function(){
            var downloadURL = 'http://example.com/download/file.zip';
            var downloadPath = "/path/to/download/localNameForFile.zip";

            var downloadFile = filed(downloadPath);
            var r = request(downloadURL).pipe(downloadFile);

            r.on('data', function(data) {
                console.log('binary data received');
            });
            downloadFile.on('end', function () {
                console.log(downloadPath, 'file downloaded to path');
            });

            downloadFile.on('error', function (err) {
                console.log(err, 'error downloading file');
            });
        },3000)
    }
);