试图用HtmlCleaner和XPath解析html

时间:2013-09-22 09:23:22

标签: java android xpath html-parsing htmlcleaner

我正在尝试从某个网站(this is the website)获取表数据,我试图通过尝试获取某个节点来尝试它。这是我的尝试:

public class ScheudeleWithDesign extends Activity {

static final String urlToParse = "https://www.easistent.com/urniki/263/razredi/18221";
static final String xpathTableContents = "//div[@id='text11']/td/tr";
TextView tw1;

@Override
protected void onCreate(Bundle savedInstanceState) {
    super.onCreate(savedInstanceState);
    setContentView(R.layout.activity_scheudele_with_design);

    tw1 = (TextView) findViewById(R.id.urnikText);

    String value = "";

    try {
        value = getScheudele();
        tw1.setText(value);
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (XPatherException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

}//End of onCreate

public String getScheudele() throws IOException, XPatherException{
    String stats = null;

    //cleaner properties
    HtmlCleaner cleaner = new HtmlCleaner();
    CleanerProperties props = cleaner.getProperties();
    props.setAllowHtmlInsideAttributes(false);
    props.setAllowMultiWordAttributes(false);
    props.setRecognizeUnicodeChars(true);
    props.setOmitComments(true);

    //URL object
    URL url = new URL(urlToParse);

    //HTML page root node
    TagNode root = cleaner.clean(url);

    //query XPath
    Object[] node = root.evaluateXPath(xpathTableContents);

    //Vzemi podatke če najdeš element
    if (node.length > 0) {
        TagNode resultNode = (TagNode)node[0];
        stats = resultNode.getText().toString();
    }

    return stats;
}

这不起作用,应用程序崩溃,这就是logcat:

09-22 11:22:08.632: E/AndroidRuntime(29385): FATAL EXCEPTION: main
09-22 11:22:08.632: E/AndroidRuntime(29385): java.lang.RuntimeException: Unable to start activity ComponentInfo{com.whizzapps.stpsurniki/com.whizzapps.stpsurniki.ScheudeleWithDesign}: android.os.NetworkOnMainThreadException
09-22 11:22:08.632: E/AndroidRuntime(29385):    at android.app.ActivityThread.performLaunchActivity(ActivityThread.java:2211)
09-22 11:22:08.632: E/AndroidRuntime(29385):    at android.app.ActivityThread.handleLaunchActivity(ActivityThread.java:2261)
09-22 11:22:08.632: E/AndroidRuntime(29385):    at android.app.ActivityThread.access$600(ActivityThread.java:141)
09-22 11:22:08.632: E/AndroidRuntime(29385):    at android.app.ActivityThread$H.handleMessage(ActivityThread.java:1256)
09-22 11:22:08.632: E/AndroidRuntime(29385):    at android.os.Handler.dispatchMessage(Handler.java:99)
09-22 11:22:08.632: E/AndroidRuntime(29385):    at android.os.Looper.loop(Looper.java:137)
09-22 11:22:08.632: E/AndroidRuntime(29385):    at android.app.ActivityThread.main(ActivityThread.java:5103)
09-22 11:22:08.632: E/AndroidRuntime(29385):    at java.lang.reflect.Method.invokeNative(Native Method)
09-22 11:22:08.632: E/AndroidRuntime(29385):    at java.lang.reflect.Method.invoke(Method.java:525)
09-22 11:22:08.632: E/AndroidRuntime(29385):    at com.android.internal.os.ZygoteInit$MethodAndArgsCaller.run(ZygoteInit.java:737)
09-22 11:22:08.632: E/AndroidRuntime(29385):    at com.android.internal.os.ZygoteInit.main(ZygoteInit.java:553)
09-22 11:22:08.632: E/AndroidRuntime(29385):    at dalvik.system.NativeStart.main(Native Method)
09-22 11:22:08.632: E/AndroidRuntime(29385): Caused by: android.os.NetworkOnMainThreadException
09-22 11:22:08.632: E/AndroidRuntime(29385):    at android.os.StrictMode$AndroidBlockGuardPolicy.onNetwork(StrictMode.java:1133)
09-22 11:22:08.632: E/AndroidRuntime(29385):    at java.net.InetAddress.lookupHostByName(InetAddress.java:385)
09-22 11:22:08.632: E/AndroidRuntime(29385):    at java.net.InetAddress.getAllByNameImpl(InetAddress.java:236)
09-22 11:22:08.632: E/AndroidRuntime(29385):    at java.net.InetAddress.getAllByName(InetAddress.java:214)
09-22 11:22:08.632: E/AndroidRuntime(29385):    at libcore.net.http.HttpConnection.<init>(HttpConnection.java:70)
09-22 11:22:08.632: E/AndroidRuntime(29385):    at libcore.net.http.HttpConnection.<init>(HttpConnection.java:50)
09-22 11:22:08.632: E/AndroidRuntime(29385):    at libcore.net.http.HttpConnection$Address.connect(HttpConnection.java:340)
09-22 11:22:08.632: E/AndroidRuntime(29385):    at libcore.net.http.HttpConnectionPool.get(HttpConnectionPool.java:87)
09-22 11:22:08.632: E/AndroidRuntime(29385):    at libcore.net.http.HttpConnection.connect(HttpConnection.java:128)
09-22 11:22:08.632: E/AndroidRuntime(29385):    at libcore.net.http.HttpEngine.openSocketConnection(HttpEngine.java:316)
09-22 11:22:08.632: E/AndroidRuntime(29385):    at libcore.net.http.HttpsURLConnectionImpl$HttpsEngine.makeSslConnection(HttpsURLConnectionImpl.java:461)
09-22 11:22:08.632: E/AndroidRuntime(29385):    at libcore.net.http.HttpsURLConnectionImpl$HttpsEngine.connect(HttpsURLConnectionImpl.java:433)
09-22 11:22:08.632: E/AndroidRuntime(29385):    at libcore.net.http.HttpEngine.sendSocketRequest(HttpEngine.java:290)
09-22 11:22:08.632: E/AndroidRuntime(29385):    at libcore.net.http.HttpEngine.sendRequest(HttpEngine.java:240)
09-22 11:22:08.632: E/AndroidRuntime(29385):    at libcore.net.http.HttpURLConnectionImpl.getResponse(HttpURLConnectionImpl.java:282)
09-22 11:22:08.632: E/AndroidRuntime(29385):    at libcore.net.http.HttpURLConnectionImpl.getInputStream(HttpURLConnectionImpl.java:177)
09-22 11:22:08.632: E/AndroidRuntime(29385):    at libcore.net.http.HttpsURLConnectionImpl.getInputStream(HttpsURLConnectionImpl.java:271)
09-22 11:22:08.632: E/AndroidRuntime(29385):    at java.net.URL.openStream(URL.java:462)
09-22 11:22:08.632: E/AndroidRuntime(29385):    at org.htmlcleaner.Utils.readUrl(Utils.java:63)
09-22 11:22:08.632: E/AndroidRuntime(29385):    at org.htmlcleaner.HtmlCleaner.clean(HtmlCleaner.java:373)
09-22 11:22:08.632: E/AndroidRuntime(29385):    at org.htmlcleaner.HtmlCleaner.clean(HtmlCleaner.java:387)
09-22 11:22:08.632: E/AndroidRuntime(29385):    at com.whizzapps.stpsurniki.ScheudeleWithDesign.getScheudele(ScheudeleWithDesign.java:63)
09-22 11:22:08.632: E/AndroidRuntime(29385):    at com.whizzapps.stpsurniki.ScheudeleWithDesign.onCreate(ScheudeleWithDesign.java:36)
09-22 11:22:08.632: E/AndroidRuntime(29385):    at android.app.Activity.performCreate(Activity.java:5133)
09-22 11:22:08.632: E/AndroidRuntime(29385):    at android.app.Instrumentation.callActivityOnCreate(Instrumentation.java:1087)
09-22 11:22:08.632: E/AndroidRuntime(29385):    at android.app.ActivityThread.performLaunchActivity(ActivityThread.java:2175)
09-22 11:22:08.632: E/AndroidRuntime(29385):    ... 11 more

1 个答案:

答案 0 :(得分:0)

  

应用程序尝试执行时抛出的异常   在其主线程上进行网络操作。

     

这仅适用于针对Honeycomb SDK或。的应用程序   更高。允许使用早期SDK版本的应用程序   他们的主要事件循环线程上的网络,但它是很重要的   泄气

来源:http://developer.android.com/reference/android/os/NetworkOnMainThreadException.html

解决方案#1:

在separeate线程中执行网络调用,尝试使用AsyncTask。

解决方案#2:

您可以使用StrictMode对象来允许'主线程上的网络'。

if (android.os.Build.VERSION.SDK_INT > 9) {
    StrictMode.ThreadPolicy policy = new StrictMode.ThreadPolicy.Builder().permitAll().build();
    StrictMode.setThreadPolicy(policy);
}

P.S。:只有在您完全理解此解决方案的副作用时才使用它。 您可以找到有关StrictMode here的更多信息。