使用jsoup从网页抓取数据

时间:2013-08-09 16:54:12

标签: android html-parsing web-scraping screen-scraping jsoup

package com.example.abc;

import java.io.IOException;
import java.net.URL;

import org.jsoup.*;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import android.os.Bundle;
import android.view.View;
import android.widget.Button;
import android.widget.EditText;
import android.widget.TextView;
import android.widget.Toast;
import android.app.Activity;
import android.content.Intent;
import android.view.View.OnClickListener;

public class MainActivity extends Activity {

@Override
protected void onCreate(Bundle savedInstanceState) {
    super.onCreate(savedInstanceState);
    setContentView(R.layout.activity_main);     

            try {
                String uri="http://www.example.com/";
                Document doc = Jsoup.connect(uri).get();



                Elements lk= doc.getElementsByTag("a");
                String lss=null;
                for(Element links : lk)
                {
                    lss=links.text();
                }

                TextView t4=(TextView)findViewById(R.id.textView2);

                t4.setText(lss);

            } catch (IOException e) {

                e.printStackTrace();
            }


}


}

/ *我也在我的清单文件中给出了,但是当我运行它时我的程序崩溃并且不幸地显示,abc已停止。我是一个在android中报废的初学者。任何帮助都非常感谢* /

/*this is my stack trace.*/

            08-09 18:00:01.033: W/dalvikvm(3106): threadid=1: thread exiting with uncaught exception (group=0x40a71930)
            08-09 18:00:01.094: E/AndroidRuntime(3106): FATAL EXCEPTION: main
            08-09 18:00:01.094: E/AndroidRuntime(3106): java.lang.RuntimeException: Unable to start activity ComponentInfo{com.example.abc/com.example.abc.MainActivity}: android.os.NetworkOnMainThreadException
            08-09 18:00:01.094: E/AndroidRuntime(3106):     at android.app.ActivityThread.performLaunchActivity(ActivityThread.java:2180)
            08-09 18:00:01.094: E/AndroidRuntime(3106):     at android.app.ActivityThread.handleLaunchActivity(ActivityThread.java:2230)
            08-09 18:00:01.094: E/AndroidRuntime(3106):     at android.app.ActivityThread.access$600(ActivityThread.java:141)
            08-09 18:00:01.094: E/AndroidRuntime(3106):     at android.app.ActivityThread$H.handleMessage(ActivityThread.java:1234)
            08-09 18:00:01.094: E/AndroidRuntime(3106):     at android.os.Handler.dispatchMessage(Handler.java:99)
            08-09 18:00:01.094: E/AndroidRuntime(3106):     at android.os.Looper.loop(Looper.java:137)
            08-09 18:00:01.094: E/AndroidRuntime(3106):     at android.app.ActivityThread.main(ActivityThread.java:5041)
            08-09 18:00:01.094: E/AndroidRuntime(3106):     at java.lang.reflect.Method.invokeNative(Native Method)
            08-09 18:00:01.094: E/AndroidRuntime(3106):     at java.lang.reflect.Method.invoke(Method.java:511)
            08-09 18:00:01.094: E/AndroidRuntime(3106):     at com.android.internal.os.ZygoteInit$MethodAndArgsCaller.run(ZygoteInit.java:793)
            08-09 18:00:01.094: E/AndroidRuntime(3106):     at com.android.internal.os.ZygoteInit.main(ZygoteInit.java:560)
            08-09 18:00:01.094: E/AndroidRuntime(3106):     at dalvik.system.NativeStart.main(Native Method)
            08-09 18:00:01.094: E/AndroidRuntime(3106): Caused by: android.os.NetworkOnMainThreadException
            08-09 18:00:01.094: E/AndroidRuntime(3106):     at android.os.StrictMode$AndroidBlockGuardPolicy.onNetwork(StrictMode.java:1117)
            08-09 18:00:01.094: E/AndroidRuntime(3106):     at java.net.InetAddress.lookupHostByName(InetAddress.java:385)
            08-09 18:00:01.094: E/AndroidRuntime(3106):     at java.net.InetAddress.getAllByNameImpl(InetAddress.java:236)
            08-09 18:00:01.094: E/AndroidRuntime(3106):     at java.net.InetAddress.getAllByName(InetAddress.java:214)
            08-09 18:00:01.094: E/AndroidRuntime(3106):     at libcore.net.http.HttpConnection.<init>(HttpConnection.java:70)
            08-09 18:00:01.094: E/AndroidRuntime(3106):     at libcore.net.http.HttpConnection.<init>(HttpConnection.java:50)
            08-09 18:00:01.094: E/AndroidRuntime(3106):     at libcore.net.http.HttpConnection$Address.connect(HttpConnection.java:340)
            08-09 18:00:01.094: E/AndroidRuntime(3106):     at libcore.net.http.HttpConnectionPool.get(HttpConnectionPool.java:87)
            08-09 18:00:01.094: E/AndroidRuntime(3106):     at libcore.net.http.HttpConnection.connect(HttpConnection.java:128)
            08-09 18:00:01.094: E/AndroidRuntime(3106):     at libcore.net.http.HttpEngine.openSocketConnection(HttpEngine.java:316)
            08-09 18:00:01.094: E/AndroidRuntime(3106):     at libcore.net.http.HttpEngine.connect(HttpEngine.java:311)
            08-09 18:00:01.094: E/AndroidRuntime(3106):     at libcore.net.http.HttpEngine.sendSocketRequest(HttpEngine.java:290)
            08-09 18:00:01.094: E/AndroidRuntime(3106):     at libcore.net.http.HttpEngine.sendRequest(HttpEngine.java:240)
            08-09 18:00:01.094: E/AndroidRuntime(3106):     at libcore.net.http.HttpURLConnectionImpl.connect(HttpURLConnectionImpl.java:81)
            08-09 18:00:01.094: E/AndroidRuntime(3106):     at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:425)
            08-09 18:00:01.094: E/AndroidRuntime(3106):     at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:410)
            08-09 18:00:01.094: E/AndroidRuntime(3106):     at org.jsoup.helper.HttpConnection.execute(HttpConnection.java:164)
            08-09 18:00:01.094: E/AndroidRuntime(3106):     at org.jsoup.helper.HttpConnection.get(HttpConnection.java:153)
            08-09 18:00:01.094: E/AndroidRuntime(3106):     at com.example.abc.MainActivity.onCreate(MainActivity.java:79)
            08-09 18:00:01.094: E/AndroidRuntime(3106):     at android.app.Activity.performCreate(Activity.java:5104)
            08-09 18:00:01.094: E/AndroidRuntime(3106):     at android.app.Instrumentation.callActivityOnCreate(Instrumentation.java:1080)
            08-09 18:00:01.094: E/AndroidRuntime(3106):     at android.app.ActivityThread.performLaunchActivity(ActivityThread.java:2144)
            08-09 18:00:01.094: E/AndroidRuntime(3106):     ... 11 more

1 个答案:

答案 0 :(得分:0)

您使用的网站链接是否存在问题,或者您是否未设置网络权限(uses-permission android:name =&#34; android.permission.INTERNET&#34;)或模拟器或设备你的使用有互联网问题。

您还应该将文本视图移动到try方法上方,如下所示:

protected void onCreate(Bundle savedInstanceState) {
    super.onCreate(savedInstanceState);
    setContentView(R.layout.activity_main);     
TextView t4=(TextView)findViewById(R.id.textView2);

尝试{//在此输入代码 }

我在不使用android库/概念的情况下继续使用java,并且没有错误地工作正常。

相关问题