我有一个幻像js脚本,它检查每个重定向并通过page.onNavigationRequested
回调方法在控制台中显示它。
但是当我想要捕获从page.onNavigationRequested
回调方法返回的所有URL并将它们推送到数组并最终显示脚本末尾的所有URL时,它只显示第一个重定向URL。
请您查看脚本和建议。
var page = require('webpage').create();
var sys = require('system');
var fs = require('fs');
var response = {};
var arrayOfResponses = [];
var pageUrl = 'http://example.com/r1.php';
phantom.onError = function (msg, trace) {
phantom.exit(1);
};
function forceExit(){
phantom.exit(0);
}
page.onNavigationRequested = function(url, type, willNavigate, main) {
arrayOfResponses.push(url) ;
}
response.content = arrayOfResponses;
page.open(pageUrl, function(status) {
if ( status !== 'success' ) {
phantom.exit( 1 );
} else {
phantom.exit( 0 );
}
}, 100);
setTimeout(forceExit,2000);
console.log(JSON.stringify(response));
并提前感谢你。
答案 0 :(得分:1)
您的脚本存在两个问题:
在第一个网址打开后,你太早地退出PhantomJS。它没有时间关注重定向。
您从上到下编写脚本,就像程序流是线性/同步一样,而在javascript中它不是 - onNavigationRequested
可以多次调用。
因此,考虑到这一点,让我们重写脚本以收集所有重定向,并在2秒内没有进行新的重定向时退出。
var page = require('webpage').create();
var response = {};
var arrayOfResponses = [];
var pageUrl = 'http://admin.weeqo.com/redirect/r1.php';
var exitTimeout;
// This will be called if no redirects are requested in 2 seconds
function forceExit(){
// Just for fun we'll note the final URL
var curURL = page.evaluate(function(){
return document.location.href
});
console.log("Final URL is " + curURL);
// Prepare and output the report:
response.content = arrayOfResponses;
console.log("List of all requested URLs: " + JSON.stringify(response));
// Now we can exit safely
phantom.exit(0);
}
// This is called before each redirect
page.onNavigationRequested = function(url, type, willNavigate, main) {
// Clear timeout so that script is not shut down
// because we have a new redirect
if(exitTimeout) {
clearTimeout(exitTimeout);
}
arrayOfResponses.push(url);
console.log("Navigation requested: " + url);
// Create timeout that will shut down the script
// in two seconds unless cancelled
exitTimeout = setTimeout(forceExit, 2000);
}
// open the first page
page.open(pageUrl, function(status) {
// We only care for errors because
// who knows how many time will pass before
// we hit the last redirect
if ( status !== 'success' ) {
phantom.exit( 1 );
}
});