Question

我想从网址获取所有网页内容，但我无法做到，有人帮助我。

这是我的代码：

public class HttpHelper {
private static final int CONNECTION_TIMEOUT = 30000;
private static final int SOCKET_TIMEOUT = 10000;

public static String GET(String uri) {
    HttpParams params = new BasicHttpParams();
    HttpConnectionParams.setConnectionTimeout(params, CONNECTION_TIMEOUT);
    HttpConnectionParams.setSoTimeout(params, SOCKET_TIMEOUT);
    HttpClient httpclient = new DefaultHttpClient(params);
    HttpResponse response;
    String data = "";
    try {
        response = httpclient.execute(new HttpGet(uri));
        StatusLine statusLine = response.getStatusLine();

        if (statusLine.getStatusCode() == HttpStatus.SC_OK) {
            ByteArrayOutputStream out = new ByteArrayOutputStream();
            response.getEntity().writeTo(out);
            out.close();
            data = out.toString();
            Log.i("html", data);
        } else {
            // Closes the connection.
            response.getEntity().getContent().close();
            throw new IOException(statusLine.getReasonPhrase());
        }
    } catch (ClientProtocolException e) {
    } catch (IOException e) {
    }
    return data;
}

}

在Logcat中，它写道：

01-08 20:56:00.125: I/html(1248): <!doctype html><html itemscope="" itemtype="http://schema.org/WebPage"><head><meta itemprop="image" content="/images/google_favicon_128.png"><title>Google</title><script>(function(){

01-08 20:56:00.125: I/html(1248): window.google={kEI:"20rNUv2oKOLpiAeioYHADA",getEI:function(a){for(var b;a&&(!a.getAttribute||!(b=a.getAttribute("eid")));)a=a.parentNode;return b||google.kEI},https:function(){return"https:"==window.location.protocol},kEXPI:"17259,4000116,4005204,4007278,4007661,4007830,4008067,4008133,4008142,4009033,4009103,4009352,4009565,4009641,4010077,4010806,4010858,4010899,4011063,4011228,4011258,4011524,4011559,4011863,4012001,4012096,4012190,4012275,4012316,4012318,4012320,4012342,4012365,4012415,4012427,4012512,4012523,4012526,4012538,4012547,4012691,4012836,4012851,4012869,4013104,4013140,4013312,4013374,4013378,4013414,4013416,4013433,4013474,4013513,4013567,4013667,4013668,4013669,4013671,4013672,4013701,4013723,4013782,4013787,4013794,4013838,4013842,4013846,4013853,4013920,4013960,4013979,4014054,4014057,4014097,4014179,4014215,4014316,8500073,8500075",kCSI:{e:"17259,4000116,4005204,4007278,4007661,4007830,4008067,4008133,4008142,4009033,4009103,4009352,4009565,4009641,4010077,4010806,4010858,4010899,4011063,4011228,4011258,4011524,4011559,4011863,4012001,4012096,4012190,4012275,4012316,4012318,4012320,4012342,4012365,4012415,4012427,4012512,4012523,4012526,4012538,4012547,4012691,4012836,4012851,4012869,4013104,4013140,4013312,4013374,4013378,4013414,4013416,4013433,4013474,4013513,4013567,4013667,4013668,4013669,4013671,4013672,4013701,4013723,4013782,4013787,4013794,4013838,4013842,4013846,4013853,4013920,4013960,4013979,4014054,4014057,4014097,4014179,4014215,4014316,8500073,8500075",ei:"20rNUv2oKOLpiAeioYHADA"},authuser:0,ml:function(){},kHL:"zh-CN",time:function(){return(new Date).getTime()},log:function(a,b,c,l,k){var d=new Image,f=google.lc,e=google.li,g="",h="gen_204";k&&(h=

01-08 20:56:00.125: I/html(1248): k);d.onerror=d.onload=d.onabort=function(){delete f[e]};f[e]=d;c||-1!=b.search("&ei=")||(g="&ei="+google.getEI(l));c=c||"/"+h+"?atyp=i&ct="+a+"&cad="+b+g+"&zx="+google.time();

01-08 20:56:00.125: I/html(1248): a=/^http:/i;a.test(c)&&google.https()?(google.ml(Error("GLMM"),!1,{src:c}),delete f[e]):(d.src=c,google.li=e+1)},lc:[],li:0,Toolbelt:{},y:{},x:function(a,b){google.y[a.id]=[a,b];return!1},load:function(a,b,c){google.x({id:a+

01-08 20:56:00.125: I/html(1248): m++},function(){google.load(a,b,c)})}};var m=0;})();

01-08 20:56:00.125: I/html(1248): (function(){google.sn="webhp";google.timers={};google.startTick=function(a,b){google.timers[a]={t:{start:google.time()},bfr:!!b}};google.tick=function(a,b,g){google.timers[a]||google.startTick(a);google.timers[a].t[b]=g||google.time()};google.startTick("load",!0);

01-08 20:56:00.125: I/html(1248): try{}catch(d){}})();

01-08 20:56:00.125: I/html(1248): var _gjwl=location;function _gjuc(){var a=_gjwl.href.indexOf("#");if(0<=a&&(a=_gjwl.href.substring(a),0<a.indexOf("&q=")||0<=a.indexOf("#q="))&&(a=a.substring(1),-1==a.indexOf("#"))){for(var d=0;d<a.length;){var b=d;"&"==a.charAt(b)&&++b;var c=a.indexOf("&",b);-1==c&&(c=a.length);b=a.substring(b,c);if(0==b.indexOf("fp="))a=a.substring(0,d)+a.substring(c,a.length),c=d;else if("cad=h"==b)return 0;d=c}_gjwl.href="/search?"+a+"&cad=h";return 1}return 0}

01-08 20:56:00.125: I/html(1248): function _gjh(){!_gjuc()&&window.google&&google.x&&google.x({id:"GJH"},function(){google.nav&&google.nav.gjh&&google.nav.gjh()})};

01-08 20:56:00.125: I/html(1248): window._gjh&&_gjh();</script><style>#gb{font:13px/27px Arial,sans-serif;height:30px}#gbz,#gbg{position:absolute;white-space:nowrap;top:0;height:30px;z-index:1000}#gbz{left:0;padding-left:4px}#gbg{right:0;padding-right:5px}#gbs{background:transparent;position:absolute;top:-999px;visibility:hidden;z-index:998;right:0}.gbto #gbs{background:#fff}#gbx3,#gbx4{background-color:#2d2d2d;background-image:none;_background-image:none;background-position:0 -138px;background-repeat:repeat-x;border-bottom:1px solid #000;font-size:24px;height:29px;_height:30px;opacity:1;filter:alpha(opacity=100);position:absolute;top:0;width:100%;z-index:990}#gbx3{left:0}#gbx4{right:0}#gbb{position:relative}#gbbw{left:0;position:absolute;top:30px;width:100%}.gbtcb{position:absolute;visibility:hidden}#gbz .gbtcb{right:0}#gbg .gbtcb{left:0}.gbxx{display:none !important}.g

我无法理解，为什么输出很多次。

顺便说一句，我的测试网址是“http://www.google.com”

Answer 1

Logcat会在没有您请求的情况下执行以下操作：

如果你给它一个长字符串，它会自动将它分成多个logcat输出消息。
如果你给它一个真正的长字符串，它会在一些字符后截断它。我不知道确切的数字，但似乎超过了4000。

例如，您可以查看以下内容：

Answer 2

试试这段代码，

 HttpResponse response = httpclient.execute(httpget);
        in = new BufferedReader(new InputStreamReader(response.getEntity().getContent()));
        StringBuffer sb = new StringBuffer("");
        String line = "";
        String NL = System.getProperty("line.separator");
        while ((line = in.readLine()) != null) {                    
            sb.append(line + NL);
        }
        in.close();
        String data= sb.toString();
        Log.i("html", data);

希望这有帮助

我无法从Android API 16中的网址获取所有内容

2 个答案: