无法使用jsoup检索完整的网页,可能的修复?

时间:2016-09-14 06:14:07

标签: java android jsoup

<html><head><META NAME="robots" CONTENT="noindex,nofollow"><script>(function(){function getSessionCookies(){var cookieArray=new Array();var cName=/^\s?incap_ses_/;var c=document.cookie.split(";");for(var i=0;i<c.length;i++){var key=c[i].substr(0,c[i].indexOf("="));var value=c[i].substr(c[i].indexOf("=")+1,c[i].length);if(cName.test(key)){cookieArray[cookieArray.length]=value}}return cookieArray}function setIncapCookie(vArray){var res;try{var cookies=getSessionCookies();var digests=new Array(cookies.length);for(var i=0;i<cookies.length;i++){digests[i]=simpleDigest((vArray)+cookies[i])}res=vArray+",digest="+(digests.join())}catch(e){res=vArray+",digest="+(encodeURIComponent(e.toString()))}createCookie("___utmvc",res,20)}function simpleDigest(mystr){var res=0;for(var i=0;i<mystr.length;i++){res+=mystr.charCodeAt(i)}return res}function createCookie(name,value,seconds){var expires="";if(seconds){var date=new Date();date.setTime(date.getTime()+(seconds*1000));var expires="; expires="+date.toGMTString()}document.cookie=name+"="+value+expires+"; path=/"}function test(o){var res="";var vArray=new Array();for(var j=0;j<o.length;j++){var test=o[j][0];switch(o[j][1]){case"exists":try{if(typeof(eval(test))!="undefined"){vArray[vArray.length]=encodeURIComponent(test+"=true")}else{vArray[vArray.length]=encodeURIComponent(test+"=false")}}catch(e){vArray[vArray.length]=encodeURIComponent(test+"=false")}break;case"value":try{try{res=eval(test);if(typeof(res)==="undefined"){vArray[vArray.length]=encodeURIComponent(test+"=undefined")}else if(res===null){vArray[vArray.length]=encodeURIComponent(test+"=null")}else{vArray[vArray.length]=encodeURIComponent(test+"="+res.toString())}}catch(e){vArray[vArray.length]=encodeURIComponent(test+"=cannot evaluate");break}break}catch(e){vArray[vArray.length]=encodeURIComponent(test+"="+e)}case"plugin_extentions":try{var extentions=[];try{i=extentions.indexOf("i")}catch(e){vArray[vArray.length]=encodeURIComponent("plugin_ext=indexOf is not a function");break}try{var num=navigator.plugins.length if(num==0||num==null){vArray[vArray.length]=encodeURIComponent("plugin_ext=no plugins");break}}catch(e){vArray[vArray.length]=encodeURIComponent("plugin_ext=cannot evaluate");break}for(var i=0;i<navigator.plugins.length;i++){if(typeof(navigator.plugins[i])=="undefined"){vArray[vArray.length]=encodeURIComponent("plugin_ext=plugins[i] is undefined");break}var filename=navigator.plugins[i].filename var ext="no extention";if(typeof(filename)=="undefined"){ext="filename is undefined"}else if(filename.split(".").length>1){ext=filename.split('.').pop()}if(extentions.indexOf(ext)<0){extentions.push(ext)}}for(i=0;i<extentions.length;i++){vArray[vArray.length]=encodeURIComponent("plugin_ext="+extentions[i])}}catch(e){vArray[vArray.length]=encodeURIComponent("plugin_ext="+e)}break}}vArray=vArray.join();return vArray}var o=[["navigator","exists"],["navigator.vendor","value"],["navigator.appName","value"],["navigator.plugins.length==0","value"],["navigator.platform","value"],["navigator.webdriver","value"],["platform","plugin_extentions"],["ActiveXObject","exists"],["webkitURL","exists"],["_phantom","exists"],["callPhantom","exists"],["chrome","exists"],["yandex","exists"],["opera","exists"],["opr","exists"],["safari","exists"],["awesomium","exists"],["puffinDevice","exists"],["navigator.cpuClass","exists"],["navigator.oscpu","exists"],["navigator.connection","exists"],["window.outerWidth==0","value"],["window.outerHeight==0","value"],["window.WebGLRenderingContext","exists"],["document.documentMode","value"],["eval.toString().length","value"]];try{setIncapCookie(test(o));document.createElement("img").src="/_Incapsula_Resource?SWKMTFSR=1&e="+Math.random()}catch(e){img=document.createElement("img");img.src="/_Incapsula_Resource?SWKMTFSR=1&e="+e}})();</script><script>(function() { var z="";var b="7472797B766172207868723B76617220743D6E6577204461746528292E67657454696D6528293B766172207374617475733D227374617274223B7661722074696D696E673D6E65772041727261792833293B77696E646F772E6F6E756E6C6F61643D66756E6374696

这是我在Android中使用Jsoup检索的输出。

这是我用于相同的代码:

   private class SimpleTask1 extends AsyncTask<String, String, String>
{
    ProgressDialog loader;


    @Override
    protected void onPreExecute()
    {
        loader = new ProgressDialog(MainActivity.this,ProgressDialog.STYLE_SPINNER);
        loader.setMessage("loading engine");
        loader.show();

        //getApplicationContext().progressView.startAnimation();

    }

    protected String doInBackground(String... urls)
    {
        String result1 = "";
        try {



            HttpGet httpGet1 = new HttpGet(urls[0]);
            HttpClient client1 = new DefaultHttpClient();

            HttpResponse response1 = client1.execute(httpGet1);

            int statusCode = response1.getStatusLine().getStatusCode();

            if (statusCode == 200) {
                InputStream inputStream1 = response1.getEntity().getContent();
                BufferedReader reader1 = new BufferedReader
                        (new InputStreamReader(inputStream1));
                String line1;
                while ((line1 = reader1.readLine()) != null)
                {
                    result1 += line1;
                    if(isCancelled())
                        break;

                }
            }



        } catch (ClientProtocolException e) {

        } catch (IOException e) {

        }
        //Log.w("PREMIERE::::",result);
        return result1;
    }

    protected void onPostExecute(String jsonString)
    {
        loader.dismiss();
        showData(jsonString);



    }




}

我需要从中检索数据的网址是this。现在你可以看到,从它的原始来源,源内有更多的内容,jsoup没有检索。可能的原因是什么以及如何解决?

PS:从其他答案中,我读到Jsoup无法读取动态创建的内容,并建议替代HtmlUnit。但它适用于java,但似乎不适用于Android。任何解决过类似问题的人都会帮忙。

0 个答案:

没有答案