我正在尝试使用CasperJS下载文件。如果使用浏览器,则在用户单击按钮时开始下载,响应标头如下所示:
我试过这两种方法没有运气:
1)https://stackoverflow.com/a/26334034
使用这种方法,if语句中的代码块永远不会被执行。如果删除条件,则会保存一堆资源,例如css文件等。所以事件监听器正在工作,但由于某些原因,当我使用CasperJs单击函数点击应该开始下载的按钮时,不会触发。
2)https://stackoverflow.com/a/30122021/692695
File.csv已保存,但它是网站的源代码,而不是我点击网站上的按钮时获得的csv文件。
我的所有代码:
'use strict';
var utils = require('utils');
var casper = require('casper').create({
//verbose: true,
//logLevel: "debug",
clientScripts: ["node_modules/jquery/dist/jquery.min.js"]
});
function writeHtml(filename) {
var fs = require('fs');
var content = casper.getHTML();
fs.write(filename, content, 'w');
}
function getUrl() {
var url;
url = $('.tableofcontent_link:contains("Väestö työmarkkina-aseman, sukupuolen ja iän mukaan")').parent().attr('href');
return url;
}
casper.selectOptionByValue = function(selector, valueToMatch){
this.evaluate(function(selector, valueToMatch){
var select = document.querySelector(selector),
found = false;
Array.prototype.forEach.call(select.children, function(opt, i){
if (!found && opt.value.indexOf(valueToMatch) !== -1) {
select.selectedIndex = i;
found = true;
}
});
// dispatch change event in case there is some kind of validation
var evt = document.createEvent("UIEvents"); // or "HTMLEvents"
evt.initUIEvent("change", true, true);
select.dispatchEvent(evt);
}, selector, valueToMatch);
};
var link;
var url = 'http://pxnet2.stat.fi/PXWeb/pxweb/fi/StatFin/StatFin__tym__tyti/?table';
casper.start(url);
casper.userAgent('Mozilla/5.0 (Macintosh; Intel Mac OS X)');
casper.then(function () {
this.echo("Now at: " + this.getCurrentUrl());
link = 'http://pxnet2.stat.fi' + casper.evaluate(getUrl);
});
casper.then(function () {
this.open(link);
});
casper.then(function() {
this.echo("Now at: " + this.getCurrentUrl());
// Select all data for each item
casper.click('#ctl00_ContentPlaceHolderMain_VariableSelector1_VariableSelector1_VariableSelectorValueSelectRepeater_ctl01_VariableValueSelect_VariableValueSelect_SelectAllButton');
casper.click('#ctl00_ContentPlaceHolderMain_VariableSelector1_VariableSelector1_VariableSelectorValueSelectRepeater_ctl02_VariableValueSelect_VariableValueSelect_SelectAllButton');
casper.click('#ctl00_ContentPlaceHolderMain_VariableSelector1_VariableSelector1_VariableSelectorValueSelectRepeater_ctl03_VariableValueSelect_VariableValueSelect_SelectAllButton');
casper.click('#ctl00_ContentPlaceHolderMain_VariableSelector1_VariableSelector1_VariableSelectorValueSelectRepeater_ctl04_VariableValueSelect_VariableValueSelect_SelectAllButton');
casper.click('#ctl00_ContentPlaceHolderMain_VariableSelector1_VariableSelector1_VariableSelectorValueSelectRepeater_ctl05_VariableValueSelect_VariableValueSelect_SelectAllButton');
});
casper.then(function() {
// casper.selectOptionByValue('#ctl00_ContentPlaceHolderMain_VariableSelector1_VariableSelector1_OutputFormats_OutputFormats_OutputFormatDropDownList',
// 'FileTypeExcelX');
// Select the format of the file from the select option list at the bottom
casper.selectOptionByValue('#ctl00_ContentPlaceHolderMain_VariableSelector1_VariableSelector1_OutputFormats_OutputFormats_OutputFormatDropDownList',
'FileTypeCsvWithHeadingAndSemiColon');
});
casper.then(function () {
// just for debugging
writeHtml('page1.html');
});
casper.then(function() {
//casper.click('#ctl00_ContentPlaceHolderMain_VariableSelector1_VariableSelector1_ButtonViewTable');
});
casper.then(function() {
// FIRST ATTEMPT TO LOAD THE DATA TO a file called file.csv
var formData = casper.evaluate(function(){
return $('form#aspnetForm').serialize();
});
//this.echo("Params: " + formData);
var targetFile = 'file.csv';
casper.download(link, targetFile, 'POST', formData);
});
casper.then(function () {
// just for debugging
writeHtml('page2.html');
});
// SECCOND ATTEMPT TO LOAD THE DATA TO a file called stats.csv
casper.on('resource.received', function (resource) {
if ((resource.url.indexOf('tyti_001') !== -1) ) {
this.echo(resource.url);
var file;
file = "stats.csv";
try {
this.echo("Attempting to download file " + file);
var fs = require('fs');
casper.download(resource.url, fs.workingDirectory + '/' + file);
} catch (e) {
this.echo(e);
}
}
});
casper.run(function () {
this.echo('End').exit();
});
我的package.json:
{
"scripts": {
"test": "dotest"
},
"pre-commit": ["test"],
"dependencies": {
"jquery": "^3.3.1"
},
"devDependencies": {
"pre-commit": "^1.2.2"
}
}
代码说明: 首先访问此页面:http://pxnet2.stat.fi/PXWeb/pxweb/fi/StatFin/StatFin__tym__tyti/statfin_tyti_pxt_001.px/?rxid=bd4d5dc1-358d-407e-ae47-13266b79bfd0 在那里,动态选择一个指定的链接并移动到那里。 单击V-shapen图标(查看附带的屏幕截图)选择所有数据,然后选择文件格式。
答案 0 :(得分:0)
除phantomjs 2.0.0之外,所有版本我都曾遇到此问题。我还尝试了您一年前从SO共享的解决方案,但效果不佳。 我将假设您使用的不是2.0.0的phantomjs版本。 这是下载链接 https://bitbucket.org/ariya/phantomjs/downloads/
有了它,您将可以访问onFileDownload方法,您可以像下面这样重写和使用
casper.page.onFileDownload = function(status){
console.log('onFileDownload(' + status + ')');
return "newfile.csv";
};
onFileDownload将在由于单击按钮(ajax)或顺序的获取/发布请求而下载文件时被调用。 您所要做的就是触发按钮/链接的点击,这将开始下载。
注意:我的解决方案是假设其他所有内容(网站未阻止phantomjs,并且您的请求标头/ cookie符合预期)