我正试图在just-eat.co.uk上负责任地搜集一些数据
使用webclient我得到一个包含一些javascript代码的页面,以检查我是不是来自incapsula的机器人。
var b="...DATAENCODED...";
for (var i=0;i<b.length;i+=2){z=z+parseInt(b.substring(i, i+2), 16)+",";}z = z.substring(0,z.length-1); eval(eval('String.fromCharCode('+z+')');
解码eval中的内容(eval(...))给了我
try{
var xhr;
var t=new Date().getTime();
var status="start";
var timing=new Array(3);
window.onunload=function(){
timing[2]="r:"+(new Date().getTime()-t);
document.createElement("img").src="/_Incapsula_Resource?XXXXXXX=52&t=63&d="+encodeURIComponent(status+" ("+timing.join()+")")
};
if(window.XMLHttpRequest) {
xhr=new XMLHttpRequest
}else{
xhr=new ActiveXObject("Microsoft.XMLHTTP")
}
xhr.onreadystatechange=function(){
switch(xhr.readyState){
case 0:
status=new Date().getTime()-t+": request not initialized ";
break;
case 1:
status=new Date().getTime()-t+": server connection established";
break;
case 2:
status=new Date().getTime()-t+": request received";
break;
case 3:
status=new Date().getTime()-t+": processing request";
break;
case 4:
status="complete";
timing[1]="c:"+(new Date().getTime()-t);
if(xhr.status==200){
parent.location.reload()
}
break
}
};
timing[0]="s:"+(new Date().getTime()-t);
xhr.open("GET","/_Incapsula_Resource?SWHANEDL=111...DATA...111,222...DATA...222,333...DATA...333,444...DATA...444",false);
xhr.send(null)
}
catch(c)
{
status+=new Date().getTime()-t+" incap_exc: "+c;
document.createElement("img").src="/_Incapsula_Resource?XXXXXXX=52&t=63&d="+encodeURIComponent(status+" ("+timing.join()+")")
};
有没有办法绕过这个webclient?可能毫无意义,因为它正在运行javascript。有一个工具可以在Chrome之类的浏览器中发送请求,并通过保存浏览器呈现的信息来捕获结果。