我遇到了CasperJS脚本:
var casper = require('casper').create();
var fs=require('fs');
casper.start('http://int.soccerway.com/national/switzerland/super-league/20152016/regular-season/r31601/matches/?ICID=PL_3N_02', function() {
this.wait(2000, function() {
fs.write("swiss.html", this.getHTML() );
});
this.wait(2000, function() {
var evObj = document.createEvent('Events');
evObj.initEvent('click', true, false);
document.getElementById('page_competition_1_block_competition_matches_6_previous').dispatchEvent(evObj);
});
this.wait(2000, function() {
fs.write("swiss2.html", this.getHTML() );
});
});
casper.run();
我想在代码中打开链接,而不是点击上一页并获取页面的html(我希望获得每个匹配结果的完整季节的html文档。)
我做错了什么? (我是首发)
谢谢..
答案 0 :(得分:2)
剧本几乎是正确的。唯一的错误是与页面交互时(单击"上一个"按钮)。
您无法从脚本内部访问页面元素,您必须在打开的网页上下文中评估("注入")该代码。在CasperJS中,有casper.evaluate()功能可以做到这一点。
from eve import Eve
def on_fetched_resource(resource, response):
del(response['_links'])
del(response['_meta'])
# would result in an empty JSON document
# del(response['_items'])
app = Eve()
app.on_fetched_resource += on_fetched_resource
if __name__ == '__main__':
app.run()
或者,您可以简单地编写
,而不是使用casper.evaluatevar casper = require('casper').create();
var fs=require('fs');
casper.start('http://int.soccerway.com/national/switzerland/super-league/20152016/regular-season/r31601/matches/?ICID=PL_3N_02', function() {
this.wait(2000, function() {
fs.write("swiss.html", this.getHTML() );
});
this.wait(2000, function() {
// Code inside of this function will run
// as if it was placed inside the target page.
casper.evaluate(function(term) {
var evObj = document.createEvent('Events');
evObj.initEvent('click', true, false);
var prev_link = document.getElementById('page_competition_1_block_competition_matches_6_previous');
prev_link.dispatchEvent(evObj);
});
});
this.wait(2000, function() {
fs.write("swiss2.html", this.getHTML() );
});
});
casper.run();
正如Artjom B.建议的那样。