无法使用CasperJS下载文件

时间:2018-06-19 08:38:33

标签: casperjs

我正在尝试使用CasperJS下载文件。如果使用浏览器,则在用户单击按钮时开始下载,响应标头如下所示: enter image description here

我试过这两种方法没有运气:

1)https://stackoverflow.com/a/26334034

使用这种方法,if语句中的代码块永远不会被执行。如果删除条件,则会保存一堆资源,例如css文件等。所以事件监听器正在工作,但由于某些原因,当我使用CasperJs单击函数点击应该开始下载的按钮时,不会触发。

2)https://stackoverflow.com/a/30122021/692695

File.csv已保存,但它是网站的源代码,而不是我点击网站上的按钮时获得的csv文件。

我的所有代码:

'use strict';

var utils = require('utils');

var casper = require('casper').create({
    //verbose: true,
    //logLevel: "debug",
    clientScripts: ["node_modules/jquery/dist/jquery.min.js"]
});

function writeHtml(filename) {
    var fs = require('fs');
    var content = casper.getHTML();
    fs.write(filename, content, 'w');
}

function getUrl() {
  var url;
  url = $('.tableofcontent_link:contains("Väestö työmarkkina-aseman, sukupuolen ja iän mukaan")').parent().attr('href');
  return url;
}

casper.selectOptionByValue = function(selector, valueToMatch){
    this.evaluate(function(selector, valueToMatch){
        var select = document.querySelector(selector),
            found = false;
        Array.prototype.forEach.call(select.children, function(opt, i){
            if (!found && opt.value.indexOf(valueToMatch) !== -1) {
                select.selectedIndex = i;
                found = true;
            }
        });
        // dispatch change event in case there is some kind of validation
        var evt = document.createEvent("UIEvents"); // or "HTMLEvents"
        evt.initUIEvent("change", true, true);
        select.dispatchEvent(evt);
    }, selector, valueToMatch);
};

var link;
var url = 'http://pxnet2.stat.fi/PXWeb/pxweb/fi/StatFin/StatFin__tym__tyti/?table';
casper.start(url);
casper.userAgent('Mozilla/5.0 (Macintosh; Intel Mac OS X)');
casper.then(function () {
    this.echo("Now at: " + this.getCurrentUrl());
    link = 'http://pxnet2.stat.fi' + casper.evaluate(getUrl);
});
casper.then(function () {
    this.open(link);
});
casper.then(function() {
  this.echo("Now at: " + this.getCurrentUrl());
  // Select all data for each item
  casper.click('#ctl00_ContentPlaceHolderMain_VariableSelector1_VariableSelector1_VariableSelectorValueSelectRepeater_ctl01_VariableValueSelect_VariableValueSelect_SelectAllButton');
  casper.click('#ctl00_ContentPlaceHolderMain_VariableSelector1_VariableSelector1_VariableSelectorValueSelectRepeater_ctl02_VariableValueSelect_VariableValueSelect_SelectAllButton');
  casper.click('#ctl00_ContentPlaceHolderMain_VariableSelector1_VariableSelector1_VariableSelectorValueSelectRepeater_ctl03_VariableValueSelect_VariableValueSelect_SelectAllButton');
  casper.click('#ctl00_ContentPlaceHolderMain_VariableSelector1_VariableSelector1_VariableSelectorValueSelectRepeater_ctl04_VariableValueSelect_VariableValueSelect_SelectAllButton');
  casper.click('#ctl00_ContentPlaceHolderMain_VariableSelector1_VariableSelector1_VariableSelectorValueSelectRepeater_ctl05_VariableValueSelect_VariableValueSelect_SelectAllButton');
});
casper.then(function() {
  // casper.selectOptionByValue('#ctl00_ContentPlaceHolderMain_VariableSelector1_VariableSelector1_OutputFormats_OutputFormats_OutputFormatDropDownList',
  // 'FileTypeExcelX');

  // Select the format of the file from the select option list at the bottom
  casper.selectOptionByValue('#ctl00_ContentPlaceHolderMain_VariableSelector1_VariableSelector1_OutputFormats_OutputFormats_OutputFormatDropDownList',
  'FileTypeCsvWithHeadingAndSemiColon');
});
casper.then(function () {
    // just for debugging
    writeHtml('page1.html');
});
casper.then(function() {
  //casper.click('#ctl00_ContentPlaceHolderMain_VariableSelector1_VariableSelector1_ButtonViewTable');
});
casper.then(function() {
  // FIRST ATTEMPT TO LOAD THE DATA TO a file called file.csv
  var formData = casper.evaluate(function(){
    return $('form#aspnetForm').serialize();
  });
  //this.echo("Params: " + formData);
  var targetFile = 'file.csv';
  casper.download(link, targetFile, 'POST', formData);
});
casper.then(function () {
    // just for debugging
    writeHtml('page2.html');
});

// SECCOND ATTEMPT TO LOAD THE DATA TO a file called stats.csv
casper.on('resource.received', function (resource) {
  if ((resource.url.indexOf('tyti_001') !== -1) ) {
    this.echo(resource.url);
    var file;
    file = "stats.csv";
    try {
      this.echo("Attempting to download file " + file);
      var fs = require('fs');
      casper.download(resource.url, fs.workingDirectory + '/' + file);
    } catch (e) {
      this.echo(e);
    }
  }
});

casper.run(function () {
    this.echo('End').exit();
});

我的package.json:

{
    "scripts": {
      "test": "dotest"
    },
    "pre-commit": ["test"],
    "dependencies": {
        "jquery": "^3.3.1"
    },
    "devDependencies": {
        "pre-commit": "^1.2.2"
    }
}

代码说明: 首先访问此页面:http://pxnet2.stat.fi/PXWeb/pxweb/fi/StatFin/StatFin__tym__tyti/statfin_tyti_pxt_001.px/?rxid=bd4d5dc1-358d-407e-ae47-13266b79bfd0 在那里,动态选择一个指定的链接并移动到那里。 单击V-shapen图标(查看附带的屏幕截图)选择所有数据,然后选择文件格式。

Screenshot with annotations

1 个答案:

答案 0 :(得分:0)

除phantomjs 2.0.0之外,所有版本我都曾遇到此问题。我还尝试了您一年前从SO共享的解决方案,但效果不佳。 我将假设您使用的不是2.0.0的phantomjs版本。 这是下载链接 https://bitbucket.org/ariya/phantomjs/downloads/

有了它,您将可以访问onFileDownload方法,您可以像下面这样重写和使用

casper.page.onFileDownload = function(status){
    console.log('onFileDownload(' + status + ')'); 
    return "newfile.csv";
};

onFileDownload将在由于单击按钮(ajax)或顺序的获取/发布请求而下载文件时被调用。 您所要做的就是触发按钮/链接的点击,这将开始下载。

注意:我的解决方案是假设其他所有内容(网站未阻止phantomjs,并且您的请求标头/ cookie符合预期)