Question

我正在尝试使用PhantomJS来获取动态生成的网页的html源（实际上标题就够了），例如：

我已经尝试过在互联网上找到的几个脚本，但没有一个会检索到正确的内容（标题为空）。任何帮助或提示将不胜感激。

我已尝试过这些方面（对输出到控制台的一些修改）：

var page = require('webpage').create();
page.open('http://google.com', function () {
    console.log(page.content);
    phantom.exit();
});

也是这样：

var page = require('webpage').create();  
page.open('http://www.google.com', function (status) {
    if (status !== 'success') {
        console.log('Unable to access network');
    } else {
        var p = page.evaluate(function () {
            return document.getElementsByTagName('html')[0].innerHTML
        });
        console.log(p);
    }
    phantom.exit();
});

也是这样：

var page = require('webpage').create();
var fs = require('fs');
page.open('https://www.somesite.com/page.aspx', function () {
    page.evaluate(function(){

    });

    page.render('export.png');
    fs.write('1.html', page.content, 'w');
    phantom.exit();
});

也是这样：

page.open(address, function (status) {
    if (status !== 'success') {
        console.log('Unable to load the address!');
        phantom.exit();
    } else {
        window.setTimeout(function () {
            page.render(output);
            phantom.exit();
        }, 1000); // Change timeout as required to allow sufficient time 
    }
});

PhantomJS不会加载网址的动态内容

0 个答案: