我正在尝试使用phantomjs从url https://app.kabuto.com/auth中抓取页面内容(尽管我的目标是最终在python中使用selenium phantomjs webdriver)。幻影脚本下面运行良好,没有错误但仍然无法获取页面文本('电子邮件','密码'等)。这些是表单元素和标题,我可以从页面的'Inspect Element'看到,而'View Page Source'只包含javascript。我可能会犯一些错误,因为我是幻影的新手,缺乏javascript知识,任何帮助都表示赞赏!
try {
var system = require('system');
var url = 'https://app.kabuto.com/auth'
var page = require('webpage').create();
console.log('The default user agent is ' + page.settings.userAgent);
page.settings.userAgent = "SpecialAgent"
function printArgs() {
var i, ilen;
for (i = 0, ilen = arguments.length; i < ilen; ++i) {
console.log(" arguments[" + i + "] = " + JSON.stringify(arguments[i]));
}
console.log("");
}
page.onInitialized = function() {
console.log("page.onInitialized");
printArgs.apply(this, arguments);
};
page.onResourceError = function(resourceError) {
page.reason = resourceError.errorString;
page.reason_url = resourceError.url;
};
page.onLoadStarted = function() {
console.log("page.onLoadStarted");
printArgs.apply(this, arguments);
};
page.onLoadFinished = function() {
console.log("page.onLoadFinished");
printArgs.apply(this, arguments);
};
page.onResourceRequested = function(request) {
console.log('Request ' + JSON.stringify(request, undefined, 4));
};
page.onResourceReceived = function(response) {
console.log('Receive ' + JSON.stringify(response, undefined, 4));
};
page.onError = function(msg) {
console.log('**some js error**' + msg);
};
page.onConsoleMessage = function(msg, lineNum, sourceId) {
console.log('CONSOLE: ' + msg);
};
page.open(url, function (status) {
if (status !== 'success') {
console.log( 'Unable to access network' );
}
else {
window.setTimeout(function () {
page.evaluate(function(){
console.log('innerText is:' + window.document.body.innerText);
console.log('outerHTML is: ' + document.documentElement.outerHTML);
});
phantom.exit();
}, 300);
}
});
}
catch(e) {
console.log("Error: " + e.description);
}