PhantomJS和CasperJS点击链接并获取HTML

时间:2015-11-19 16:00:04

标签: javascript ajax phantomjs casperjs scraper

我遇到了CasperJS脚本:

var casper = require('casper').create();
var fs=require('fs');

casper.start('http://int.soccerway.com/national/switzerland/super-league/20152016/regular-season/r31601/matches/?ICID=PL_3N_02', function() {
  this.wait(2000, function() {
    fs.write("swiss.html", this.getHTML() );

  });
  this.wait(2000, function() {
    var evObj = document.createEvent('Events');
    evObj.initEvent('click', true, false);
    document.getElementById('page_competition_1_block_competition_matches_6_previous').dispatchEvent(evObj);
  });
  this.wait(2000, function() {
    fs.write("swiss2.html", this.getHTML() );   
  });
});

casper.run();

我想在代码中打开链接,而不是点击上一页并获取页面的html(我希望获得每个匹配结果的完整季节的html文档。)

我做错了什么? (我是首发)

谢谢..

1 个答案:

答案 0 :(得分:2)

剧本几乎是正确的。唯一的错误是与页面交互时(单击"上一个"按钮)。

您无法从脚本内部访问页面元素,您必须在打开的网页上下文中评估("注入")该代码。在CasperJS中,有casper.evaluate()功能可以做到这一点。

from eve import Eve

def on_fetched_resource(resource, response):
    del(response['_links'])
    del(response['_meta'])

    # would result in an empty JSON document
    # del(response['_items'])

app = Eve()
app.on_fetched_resource += on_fetched_resource

if __name__ == '__main__':
    app.run()

或者,您可以简单地编写

,而不是使用casper.evaluate
var casper = require('casper').create();
var fs=require('fs');

casper.start('http://int.soccerway.com/national/switzerland/super-league/20152016/regular-season/r31601/matches/?ICID=PL_3N_02', function() {
  this.wait(2000, function() {
    fs.write("swiss.html", this.getHTML() );

  });
  this.wait(2000, function() {

        // Code inside of this function will run 
        // as if it was placed inside the target page.
        casper.evaluate(function(term) {

            var evObj = document.createEvent('Events');
            evObj.initEvent('click', true, false);
            var prev_link = document.getElementById('page_competition_1_block_competition_matches_6_previous');
            prev_link.dispatchEvent(evObj);

        });

  });


  this.wait(2000, function() {
    fs.write("swiss2.html", this.getHTML() );   
  });
});

casper.run();

正如Artjom B.建议的那样。