file_get_contents同时绕过javascript检测

时间:2017-05-30 01:42:00

标签: javascript php

我正在尝试将页面保存为PDF文件,为此我尝试阅读该页面的内容,然后使用PDF库创建文件。 我面临的问题是,当使用以下代码阅读内容时,我收到“请启用JavaScript以查看页面内容”。而不是页面内容

<?php
    $url='https://www.carfax.com/VehicleHistory/p/Report.cfx?vin=1J4RR5GG5BC586221&csearch=0&partner=GAZ_0';
    //file_get_contents() reads remote webpage content
    $lines_string=file_get_contents($url);
    //output, you can also save it locally on the server
    echo htmlspecialchars($lines_string);
?>

如何绕过此java脚本错误或应该使用其他方法?

这是我得到的回应:

<html>
<head>
<meta http-equiv="Pragma" content="no-cache"/> 
<meta http-equiv="Expires" content="-1"/> 
<meta http-equiv="CacheControl" content="no-cache"/> 
<meta http-equiv="X-UA-Compatible" content="IE=edge"/> 
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/> 
<link rel="shortcut icon" href="data:;base64,iVBORw0KGgo="/> 
<script> (function(){ var securemsg; var dosl7_common; window["bobcmn"] = "11111010101010200000002200000005200000000289128f7a200000096300000000300000000300000006/TSPD/300000008TSPD_101300000005https200000000200000000"; window.jar=!!window.jar;try{(function(){try{var jj,Jj,Lj=1,Zj=1,Sj=1;for(var ij=0;ij<Jj;++ij)Lj+=2,Zj+=2,Sj+=3;jj=Lj+Zj+Sj;window._O===jj&&(window._O=++jj)}catch(Ij){window._O=jj}var oJ=!0;function OJ(J){J&&(oJ=!1,document.cookie="brav=ad");return oJ}function _J(){}OJ(window[_J.name]===_J);OJ("function"!==typeof ie9rgb4);OJ(/\x3c/.test(function(){return"\x3c"})&!/x3d/.test(function(){return"'x3'+'d';"})); var iJ=window.attachEvent||/mobi/i.test(window["\x6e\x61vi\x67a\x74\x6f\x72"]["\x75\x73e\x72A\x67\x65\x6et"]),IJ=+new Date+6E5,ol,_l,Il=setTimeout,jL=iJ?3E4:6E3;function JL(){if(!document.querySelector)return!0;var J=+new Date,O=J>IJ;if(O)return OJ(!1);O=_l&&ol+jL<J;O=OJ(O);ol=J;_l||(_l=!0,Il(function(){_l=!1},1));return O}JL();var LL=[17795081,27611931586,1558153217]; function oL(J){J="string"===typeof J?J:J.toString(36);var O=window[J];if(!O.toString)return;var s=""+O;window[J]=function(J,s){_l=!1;return O(J,s)};window[J].toString=function(){return s}}for(var ZL=0;ZL<LL.length;++ZL)oL(LL[ZL]);OJ(!1!==window.jar);(function(){var J={decrypt:function(J){try{return JSON.parse(function(J){J=J.split("l");var O="";for(var s=0;s<J.length;++s)O+=String.fromCharCode(J[s]);return O}(J))}catch(s){}}};return J={configuration:J.decrypt("123l34l97l99l116l105l118l101l34l58l34l110l111l34l44l34l100l101l98l117l103l103l105l110l103l34l58l34l110l111l34l44l34l109l111l100l117l108l101l49l34l58l34l101l110l97l98l108l101l100l34l44l34l109l111l100l117l108l101l50l34l58l34l101l110l97l98l108l101l100l34l44l34l109l111l100l117l108l101l51l34l58l34l101l110l97l98l108l101l100l34l44l34l109l111l100l117l108l101l52l34l58l34l101l110l97l98l108l101l100l34l125")}})(); var sL=3;window.Ls={Os:"087ba4d0fa0178004caafc50a30d48046efd9a15f604d0926f4f95da1a85d369a6d1815489a54acc4a49a5998f87f099792ce5cf3c00ed82cb613e80bec837da827a4967e05d64d8670f7d97250745b00db5a2d96701cfc9d19e00ad5ebfd2aff76046976642518c76938888a8f784eed5b5ea881a1e3668f9b030002df03262"};function l(J){return 645>J}function L(J){var O=arguments.length,s=[];for(var S=1;S<O;++S)s.push(arguments[S]-J);return String.fromCharCode.apply(String,s)}function z(J,O){J+=O;return J.toString(36)}(function SL(O){O&&"number"!==typeof O||("number"!==typeof O&&(O=1E3),O=Math.max(O,1),setInterval(function(){SL(O-10)},O))})(JL());})();}catch(x){document.cookie='brav=oex'+x;}finally{ie9rgb4=void(0);};function ie9rgb4(a,b){return a>>b>>0}; })(); </script> 
<script type="text/javascript" src="/TSPD/086821c3deab2000f497f4a10d45047d2c741eba0afdeced26cf36a836d13b181cb57773ccf959f0?type=7"></script>
<noscript>Please enable JavaScript to view the page content.</noscript> 
</head>
<body> </body>
</html>

1 个答案:

答案 0 :(得分:1)

问题是该网站正在您尝试加载的网页上进行javascript检测,这意味着很可能会使用带有noscript标记和脚本标记的大多数空白文档来加载剩下的内容。您不能以cURL的方式绕过这个。相反,你需要一个带有javascript引擎的无头浏览器。 Selenium Web Driver就是这样一种解决方案。我也找到了这个要点:https://gist.github.com/evandrix/3694955