PhantomJs无法从源渲染特定页面

时间:2017-02-12 05:22:45

标签: phantomjs

我已经成功地获取原始html(已经使用其他产品检索)然后让phantomjs获取原始html并呈现整个页面,包括运行任何/所有javascript。我最近遇到了一个没有渲染javascript的页面。

这是我运行它的方式......

phantomjs myscript.js > OUTPUT.txt 2>&1

这是演示问题的myscript.js文件......

var page = require('webpage').create(),
var system = require('system');
var address = 'http://cloud.firebrandtech.com/#!/login';
var rawHtml = '<!DOCTYPE html>\
<html>\
<head>\
    <meta charset="utf-8">\
<meta http-equiv="X-UA-Compatible" content="IE=edge">\
<meta name="viewport" content="width=device-width, initial-scale=1.0">\
<meta name="description" content="Web Portal for managing Cloud Products, Assets, and Distributions">\
<meta name="author" content="Firebrand Technologies">\
<title>Firebrand Cloud</title>\
<link rel="stylesheet" href="/widgets/css/widgets.css">\
<link rel="stylesheet" href="/css/portal.css">\
</head>\
<body ng-app="portal" fc-app="cloud" fc-direct="true" class="fc">\
    <div>\
        <div data-ng-if="user.isLoaded" data-ng-controller="PortalCtrl">\
            <div data-ng-include="getView()"></div>\
            <div class="container">\
                <div data-ui-view></div>\
            </div>\
        </div>\
    </div>\
    <script src="/widgets/js/widgets.js"></script>\
<script src="/js/vendor.js"></script>\
<script src="/js/portal.js"></script>\
</body>\
</html>';

page.settings.resourceTimeout = 5000;
page.settings.loadImages = false;
page.setContent(rawHtml, address);
window.setTimeout(function () {
    if(page.content.indexOf('Sign In') > -1)
        console.log('YAY!!! Javascript Rendered!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
    else
        console.log('BOO!!! Javascript NOT Rendered!!!!!!!!!!!!!!!!!!!!!!!!!!')     

    phantom.exit();
}, 5000);

似乎这个页面需要一些auth / cors才能工作。如果phantomjs发出实际请求(使用page.open)来获取源代码,我可以让它工作,如下例所示。但是,这个解决方案对我不起作用。 Phantomjs必须使用上面示例中的源代码(就像我提到的那样,它对所有其他站点都很有用)。

var page = require('webpage').create(),
var system = require('system');
var address = 'http://cloud.firebrandtech.com/#!/login ';

page.open(address, function(status) {  
    setTimeout(function(){
        if(page.content.indexOf('Sign In') > -1)
            console.log('YAY!!! Javascript Rendered!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
        else
            console.log('BOO!!! Javascript NOT Rendered!!!!!!!!!!!!!!!!!!!!!!!!!!')     

        phantom.exit();
    }, 5000)  
});

我已经尝试过使用以下标志,但它们似乎没有效果......

phantomjs --web-security=false --ignore-ssl-errors=true thefilebelow.js > OUTPUT.txt 2>&1

1 个答案:

答案 0 :(得分:0)

终于有了这个......

由于我使用其他产品(不是phantomjs)来检索页面源,我需要保留随该请求发回的cookie。然后我不得不使用addCookie传递这些cookie,如此......

var page = require('webpage').create(),
var system = require('system');
var address = 'http://cloud.firebrandtech.com/#!/login';
var rawHtml = 'same raw html as above...';

//THE NEXT 3 LINES ARE WHAT CHANGED
var cookiesFromInitialRequest = [{name: 'aaa', value: 'bbb', domain: 'ccc'}, etc...]
for(var i = 0; i < cookiesFromInitialRequest.length; i++)
    phantom.addCookie(cookiesFromInitialRequest[i])

page.settings.resourceTimeout = 5000;
page.settings.loadImages = false;
page.setContent(rawHtml, address);
window.setTimeout(function () {
    if(page.content.indexOf('Sign In') > -1)
        console.log('YAY!!! Javascript Rendered!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
    else
        console.log('BOO!!! Javascript NOT Rendered!!!!!!!!!!!!!!!!!!!!!!!!!!')     

    phantom.exit();
}, 5000);