我正在尝试使用此页面中的工资信息来抓取一系列击球手: https://www.swishanalytics.com/optimus/mlb/dfs-batter-projections
我正在使用node.js和node-horseman。
这是我的代码:
var Horseman = require('node-horseman');
var horseman = new Horseman();
horseman.open('https://www.swishanalytics.com/optimus/mlb/dfs-batter-projections');
if (horseman.status() === 200) {
console.log('[+] Successful page opening')
horseman.screenshot('image.png');
console.log(horseman.html());
}
horseman.close();
问题是来自horseman.html()的返回仍然是很多JavaScript,无法用像cheerio这样的东西提取。如何以编程方式执行javascript?
例如,如果我在同一个链接上查看来源,我会看到有击球手的区域以
开头function Model(){ this.batterArray =
[{"team_short":"rockies","mlbam_id":"571448","player_name":"Nolan Arenado",
显然这仍然是javascript ...我假设在某些时候它必须被执行并转换为HTML才能由浏览器呈现?
答案 0 :(得分:2)
我刚测试了它,似乎有效:
var Horseman = require('node-horseman');
var horseman = new Horseman();
horseman.open('https://www.swishanalytics.com/optimus/mlb/dfs-batter-projections');
if (horseman.status() === 200) {
console.log('[+] Successful page opening')
horseman.screenshot('image.png');
var batters = horseman.evaluate(function(){
return (new Model()).batterArray;
});
console.log(batters);
}
horseman.close();
这将为您提供一系列可在代码中使用的击球手。您可以将其写入文件或从中创建表格。
答案 1 :(得分:0)
这是应该如何运作的。
var Horseman = require('node-horseman');
var horseman = new Horseman();
horseman
.open('https://www.swishanalytics.com/optimus/mlb/dfs-batter-projections')
.status()
.then((status) => {
if(status === 200){
console.log('[+] Successful page opening')
horseman.screenshot('image.png');
var batters = horseman.evaluate(function(){
return (new Model()).batterArray;
});
console.log(batters);
}else{
console.log('no batters');
}
})
.close();