捕获analytics.js使用phantomjs加载后调用的调用

时间:2015-06-18 07:18:48

标签: javascript google-analytics phantomjs

我一直在尝试捕获analytics.js在phantomjs无头浏览器中加载后调用的调用。 这个问题是,在页面完全加载后,analytics.js会加载。因此,很难跟踪analytics.js调用。 我到现在为止尝试的代码是:

var url = "http://www.alexandani.com/necklaces/sand-dollar-expandable-necklace.html";
var auditlinks = {"www.google-analytics.com/analytics.js": 6, "metrics.alexandani.com": 9, "www.google-analytics.com/collect": 7};
var block_request = 1;
var execution_timeout = 40000;
var resource_timeout = 50000;
var inactivity_timeout = 50000;
var user_agent = "Mozilla/5.0 (Unknown; Linux x86_64) AppleWebKit/534.34 (KHTML, like Gecko) PhantomJS/1.9.0 Safari/534.34";

var page_http_status_target_url = url;
var page_http_status = null;

var inactivity_timeout_check_period = 100;

var requests = new Array();
var auditlink_urls = Object.keys(auditlinks);


function print(obj){
    console.log(JSON.stringify(obj));
}

function create_url_cleaner(){
    var rx_match_protocol = /^(http|https):\/\//i;
    var rx_match_query_params = /\/*\?.*/i;

    function clean_url(url){
        return (
            (url.replace(rx_match_protocol, ''))
            .replace(rx_match_query_params, '')
        );
    }

    return clean_url;
}



function exit(exit_reason){

    console.log(requests);
    print({
        'requests': requests,
        'exit_reason': exit_reason,

        // Returning http status as an integer makes little sense to me.
        'http_status': (page_http_status === null)
                       ? null : page_http_status.toString(),
    });
    phantom.exit(0);
}



function start_exec_time_limiter(execution_timeout){
    setTimeout(
        function (){
            console.log('hi');
            exit("EXEC_TIMEOUT");
        },
        execution_timeout
    );
}



function start_inactivity_tracker(
    inactivity_timeout,
    inactivity_timeout_check_period
){
    var last_activity_time = Date.now();

    function register_activity(){
        last_activity_time = Date.now();
    }

    function check_inactivity(){
        var now = Date.now();

        if (now - last_activity_time > inactivity_timeout){
            exit("INACTIVITY_TIMEOUT")
        }
    }

    setInterval(check_inactivity, inactivity_timeout_check_period);
    return register_activity;
}



start_exec_time_limiter(execution_timeout);

var clean_url = create_url_cleaner();
var register_activity = start_inactivity_tracker(
    inactivity_timeout, inactivity_timeout_check_period);


var page = require('webpage').create();
page.settings.userAgent = user_agent;
page.settings.resourceTimeout = resource_timeout;
//page.injectJs('wait.js');
page.onError = function (msg, stack){
    // Ignore errors in the webpage context.
}

page.onResourceReceived = function (response){
    register_activity();

    if (response.url == page_http_status_target_url){
        if (response.redirectURL){
            page_http_status_target_url = response.redirectURL;
        }
        else {
            page_http_status = response.status;
            page_http_status_target_url = null;
        }
    }
}

page.onResourceRequested = function (requestData, request){
    register_activity();
    var timestamp = Date.now();

    var url = requestData["url"];
    var bare_url = clean_url(url);

    for (var k in auditlink_urls){
        var alurl = auditlink_urls[k];

        if (bare_url.indexOf(alurl) === 0){
            requests[requests.length] = [url, auditlinks[alurl], timestamp];

            if (block_request === true){
                request.abort();
            }

            break;
        }
    }

}

page.open(url);
//---------------------------------------------------------------------------//

1 个答案:

答案 0 :(得分:0)

为什么你需要在PhantomJS中加载analytics.js?如果你想做的就是测试ga()的调用是你期望的那样,只需找出ga()函数,并断言它收到的调用就是你所说的那样。期望的。

实际上,analytics.js snippet本身只是一个存根,它存储在q属性上接收的调用,同时等待完整的库下载。

如果您没有下载analytics.js脚本,则ga.q始终可以检查。