来自aspx

时间:2015-12-21 11:57:20

标签: javascript asp.net node.js http download

我试图从任何“下载”中下载PDF文档。 this网站上使用Node的Http模块的按钮。如何在不下载aspx文件的情况下下载PDF文档,这是我的代码所发生的事情?出于某种原因,我的代码下载了一个aspx文件,该文件显示“错误消息 - 文件不存在,或者您无权查看此文件”,即使我可以从网络浏览器轻松下载该文件。这是我的代码:

var pdf_text = require("pdf-text");
var request = require("request");
var http = require("http");
var fs = require("fs");
var cheerio = require("cheerio");
var urllib = require("url");

var path = "final.pdf";
var url = "http://www2.nationalgrid.com/UK/Industry-information/System-charges/Electricity-transmission/Assistance-for-areas-with-high-distribution-costs/";
var links = [];

request(url, function(error, response, html) {
        if(!error && response.statusCode == 200) {
                var $ = cheerio.load(html);
                $(".txtLnk").each(function() {
                        links.push("http://www2.nationalgrid.com" + $(this).attr("href"));
                });

                var file = fs.createWriteStream(urllib.parse(links[1]).pathname.split('/').pop());
                var options = {
                        host: urllib.parse(links[1]).host,
                        port: 80,
                        path: urllib.parse(links[1]).pathname,
                        headers: {
                                "User-Agent": "Mozilla/5.0 (X11; Linux i686; rv:43.0) Gecko/201001101 Firefox/43.0"
                        }
                };

                http.get(options, function(res) {
                        res.on('data', function(data) {
                                file.write(data);
                        }).on('end', function() {
                                file.end();
                        });
                });

                console.log(links);
        }
});

function data_from_pdf(pdf) {
        pdf_text("pdf/" + pdf, function(err, chunks) {
                var data = chunks.join("").substring(chunks.join("").search("(p/kWh)") + 6, chunks.join("").search("(p/kWh)") + 21);
                var date = data.substring(0, data.indexOf("/") + 3);
                var rate = data.substring(data.indexOf("/") + 3);
                var json_data = "{" + "\n\tname: " + "final.pdf" + ",\n\tdate: " + date + ",\n\trate: " + rate + "\n}";

                return json_data;
        });
}

2 个答案:

答案 0 :(得分:0)

原来,如果我只是替换"选项"使用基本URL,它的工作原理。奇怪。问题解决了。 :)

答案 1 :(得分:0)

试试这个:

var request = require("request");
var fs = require("fs");
var cheerio = require("cheerio");

var path = "./final.pdf";
var url = "http://www2.nationalgrid.com/UK/Industry-information/System-charges/Electricity-transmission/Assistance-for-areas-with-high-distribution-costs/";
var links = [];

request(url, function(error, response, html) {
    if(!error && response.statusCode == 200) {
            var $ = cheerio.load(html);
            $(".txtLnk").each(function() {
                    links.push("http://www2.nationalgrid.com" + $(this).attr("href"));
            });

            var r = request(links[0]);
            r.on('response', function (res) {
                    console.log(res.headers);
                    res.pipe(fs.createWriteStream(path));
            });
    }
});