HTML从网站源代码解析Android

时间:2013-11-27 18:54:37

标签: java android parsing

我已经实现了一个基本的东西。用户通过我的应用程序搜索疾病,结果包含症状,解决方案等。 我所做的是使用WebView,完整的网站是webview。但我的要求是只显示搜索结果。 没有API。我必须从网站HTML源代码中做到这一点。

我怎么能这样做?您的建议表示赞赏。 使用网站:www.webmd.com

@Override
protected void onCreate(Bundle savedInstanceState) {
    super.onCreate(savedInstanceState);
    setContentView(R.layout.main);

    editText = (EditText) findViewById(R.id.editText1);
    button = (Button) findViewById(R.id.button1);
    webView1 = (WebView) findViewById(R.id.webView1);

    URL = "http://www.webmd.com/search/search_results/default.aspx?query=";

    button.setOnClickListener(new OnClickListener() {

        @Override
        public void onClick(View v) {
            // TODO Auto-generated method stub

            data = editText.getText().toString();
            URL1 = URL + data.replaceAll(" ", "%20");
            Log.v("URL",URL1);
    //      URL = URL+editText.getText().toString();
            if(data.trim().length()>0){

                new SearchResult().execute();

        //  webView1.loadUrl(URL1); 

            /*Intent i = new Intent(MainActivity.this, com.medicaldictonery.View.class);
            i.putExtra("url", URL1.trim());
            startActivity(i);*/
            }
        }
    });
}

public String getServerDataGET(String targetURL)
        throws ClientProtocolException, IOException {
    try {
        HttpClient client = new DefaultHttpClient();
        HttpUriRequest request = new HttpGet(targetURL);
        Log.v("link", targetURL);
        HttpResponse response = client.execute(request);
        String responseBody = "";
        HttpEntity entity = response.getEntity();

        if (entity != null) {
            responseBody = EntityUtils.toString(entity);
            Log.v("test", responseBody);
        }

        return responseBody;

    } catch (Exception e) {

        e.printStackTrace();
        return null;

    }
}
private class SearchResult extends AsyncTask<Void, Void, Void>{
    String result1;

    @Override
    protected void onPreExecute() {
        super.onPreExecute();
        mProgressDialog = new ProgressDialog(MainActivity.this);
        mProgressDialog.setTitle("WebMD");
        mProgressDialog.setMessage("Loading...");
        mProgressDialog.setIndeterminate(false);
        mProgressDialog.show();
    }

    @Override
    protected Void doInBackground(Void... params) {
        // TODO Auto-generated method stub
        try {
            // Connect to the web site
            result1=getServerDataGET(URL1);
            Log.e("document",result1);

            Document document = Jsoup.connect(URL1).get();
            //Log.e("document",getServerDataGET(URL1));
            // Using Elements to get the Meta data
            Elements description = document
                    .select("div[id=searchResults]");
            // Locate the content attribute
            result1 = description.attr("searchResults");

        } catch (IOException e) {
            e.printStackTrace();
        }
        return null;
    }

    @Override
    protected void onPostExecute(Void result) {
        // Set description into TextView
        mProgressDialog.dismiss();
        webView1.loadData(result1, "text/html; charset=UTF-8", null);

    }

}

}

1 个答案:

答案 0 :(得分:0)

要从WebView获取网页html,您需要创建一个Javascript接口,然后调用并获取html。这是一个例子:

public static void loadWebpage(final Context context) {
    WebView view = new WebView(context);

    //Enables javascript to get the html
    view.getSettings().setJavaScriptEnabled(true);
    view.addJavascriptInterface(new MyJavaScriptInterface(context), "HtmlViewer");

    view.setWebViewClient(new WebViewClient() {
        @Override
        public void onPageFinished(WebView view, String url) {
            view.loadUrl("javascript:window.HtmlViewer.showHTML('<head>'+document.getElementsByTagName('html')[0].innerHTML+'</head>');");
        }
    });

    view.loadUrl("your url");
}

static class MyJavaScriptInterface {
    private Context _context;

    MyJavaScriptInterface(Context ctx) {
        _context = ctx;
    }

    //NOTE: If your target API > 16 you must have @JavascriptInterface
    @SuppressWarnings("UnusedDeclaration")
    public void showHTML(String html) {
        //TODO what you need to with the html
    }
}