我试图从任何“下载”中下载PDF文档。 this网站上使用Node的Http模块的按钮。如何在不下载aspx文件的情况下下载PDF文档,这是我的代码所发生的事情?出于某种原因,我的代码下载了一个aspx文件,该文件显示“错误消息 - 文件不存在,或者您无权查看此文件”,即使我可以从网络浏览器轻松下载该文件。这是我的代码:
var pdf_text = require("pdf-text");
var request = require("request");
var http = require("http");
var fs = require("fs");
var cheerio = require("cheerio");
var urllib = require("url");
var path = "final.pdf";
var url = "http://www2.nationalgrid.com/UK/Industry-information/System-charges/Electricity-transmission/Assistance-for-areas-with-high-distribution-costs/";
var links = [];
request(url, function(error, response, html) {
if(!error && response.statusCode == 200) {
var $ = cheerio.load(html);
$(".txtLnk").each(function() {
links.push("http://www2.nationalgrid.com" + $(this).attr("href"));
});
var file = fs.createWriteStream(urllib.parse(links[1]).pathname.split('/').pop());
var options = {
host: urllib.parse(links[1]).host,
port: 80,
path: urllib.parse(links[1]).pathname,
headers: {
"User-Agent": "Mozilla/5.0 (X11; Linux i686; rv:43.0) Gecko/201001101 Firefox/43.0"
}
};
http.get(options, function(res) {
res.on('data', function(data) {
file.write(data);
}).on('end', function() {
file.end();
});
});
console.log(links);
}
});
function data_from_pdf(pdf) {
pdf_text("pdf/" + pdf, function(err, chunks) {
var data = chunks.join("").substring(chunks.join("").search("(p/kWh)") + 6, chunks.join("").search("(p/kWh)") + 21);
var date = data.substring(0, data.indexOf("/") + 3);
var rate = data.substring(data.indexOf("/") + 3);
var json_data = "{" + "\n\tname: " + "final.pdf" + ",\n\tdate: " + date + ",\n\trate: " + rate + "\n}";
return json_data;
});
}
答案 0 :(得分:0)
原来,如果我只是替换"选项"使用基本URL,它的工作原理。奇怪。问题解决了。 :)
答案 1 :(得分:0)
试试这个:
var request = require("request");
var fs = require("fs");
var cheerio = require("cheerio");
var path = "./final.pdf";
var url = "http://www2.nationalgrid.com/UK/Industry-information/System-charges/Electricity-transmission/Assistance-for-areas-with-high-distribution-costs/";
var links = [];
request(url, function(error, response, html) {
if(!error && response.statusCode == 200) {
var $ = cheerio.load(html);
$(".txtLnk").each(function() {
links.push("http://www2.nationalgrid.com" + $(this).attr("href"));
});
var r = request(links[0]);
r.on('response', function (res) {
console.log(res.headers);
res.pipe(fs.createWriteStream(path));
});
}
});