如何使用casperjs从提供的网站上刮取表格?

时间:2016-12-21 23:44:37

标签: web-scraping casperjs

最终目标是从提供的代理网站检索表格形式的库存数据,并将其保存到某个文本文件中。这是代码,到目前为止我通过阅读一些教程来编译:

var casper = require("casper").create();
var url = 'https://iqoption.com/en/historical-financial-quotes?active_id=1&tz_offset=60&date=2016-12-19-21-59';

var terminate = function() {
    this.echo("Exiting ...").exit();
};

var processPage = function() {

    var rows = document.querySelectorAll('#mCSB_3_container > table'); //get table from broker site (copy/paste via copy selector in chrome tools)
    //var nodes = document.getElementsByClassName('mCSB_container');

    this.echo(rows);
    this.echo(rows.length);
    for (var i = 0; i < rows.length; i++)
    {
        var cell = rows[i].querySelector('.quotes-table-result__date');
        this.echo(cell); //print each cell
    }  

};

casper.start(url);
casper.waitForSelector('#mCSB_3_container', processPage, terminate);
casper.run();

此代码应检索股票价格表并打印出每个单元格。但是,我得到的只是'undefined',这可能意味着我没有查询selectselector返回的对象。请假设我不知道任何网络编程(HTML,CSS)。

1 个答案:

答案 0 :(得分:1)

首先,问题在于waitFor没有设置得那么好,你必须等待行/单元格。
你在这个页面上得到的节点有点连线,如果有人得到一个更抽象的解决方案,其中ChildNodes处理得更好,在我的解决方案中我会非常感兴趣:

var casper = require('casper').create();
var url = 'https://eu.iqoption.com/en/historical-financial-quotes?active_id=1&tz_offset=60&date=2016-12-19-21-59';
var length;

casper.start(url);

casper.then(function() {
    this.waitForSelector('#mCSB_3_container table tbody tr');
});

function getCellContent(row, cell) {
    cellText = casper.evaluate(function(row, cell) {
        return document.querySelectorAll('table tbody tr')[row].childNodes[cell].innerText.trim();
    }, row, cell);
    return cellText;
}

casper.then(function() {
    var rows = casper.evaluate(function() {
        return document.querySelectorAll('table tbody tr');
    });
    length = rows.length;
    this.echo("table length: " + length);
});

// This part can be done nicer, but it's the way it should work ...
casper.then(function() {
    for (var i = 0; i < length; i++) {
        this.echo("Date: " + getCellContent(i, 0));
        this.echo("Bid: " + getCellContent(i, 1));
        this.echo("Ask: " + getCellContent(i, 2));
        this.echo("Quotes: " + getCellContent(i, 3));
    }
});

casper.run();