PhantomJS网站抓取结果与在浏览器中加载不同

时间:2016-09-26 22:18:47

标签: javascript web-scraping phantomjs

如果您在浏览器中加载以下网站,则可以获得" 5天滚动"用于轮询结果的选项,如URL中所指定。但是,如果您使用PhantomJS加载相同的内容,则会获得"整体"选项。我无法弄清楚原因。有什么想法吗?

http://polling.reuters.com/#poll/TM651Y15_DS_13/filters/LIKELY:1/dates/20160918-20160922/type/day

var paths = ["tvc"];

var urls  =["http://polling.reuters.com/#poll/TM651Y15_DS_13/filters/LIKELY:1/dates/20160918-20160922/type/day"];

function handle_page(url, path){

    console.log(url);

    var webPage = require('webpage');
    var page = webPage.create();
    var fs = require('fs');

    page.open(url, function(){
        var content = page.content;
        fs.write(path, content,'w');
        next_page();
    });
}

function next_page(){

    if(urls.length == 0){
        console.log("exiting");
        phantom.exit(0);
    }

    var url = urls.shift();
    var path = "reuters_" + paths.shift() + ".html";

    handle_page(url,path);
}

next_page();

0 个答案:

没有答案