阅读网站的HTML代码

时间:2016-01-26 19:48:41

标签: java android web-scraping screen-scraping

我正在尝试阅读网站的HTML代码,因此我正在使用此代码: 我的一个碎片:

public class FragmentFavorites extends Fragment {
    View view;
    TextView text;
    Homescreen home = new Homescreen();
    public View onCreateView(LayoutInflater inflater, ViewGroup container, Bundle savedInstanceState) {
        view = inflater.inflate(R.layout.favorites,container, false);
        text = (TextView) view.findViewById(R.id.textView2);
        try {
            text.setText(home.getHtml("http://pastebin.com/u7jHeNwf"));
        } catch (IOException e) {
            e.printStackTrace();
        }
        return view;
    }
}

这是我所指的getHtml():

public static String getHtml(String url) throws IOException {
        URLConnection connection = (new URL(url)).openConnection();
        connection.setConnectTimeout(5000);
        connection.setReadTimeout(5000);
        connection.connect();

        InputStream in = connection.getInputStream();
        BufferedReader reader = new BufferedReader(new InputStreamReader(in));
        StringBuilder html = new StringBuilder();
        for (String line; (line = reader.readLine()) != null; ) {
            html.append(line);
        }
        in.close();

        return html.toString();
    }

不幸的是,每当我滚动到这个片段/调用getHTML时,我的应用就会停止运行。任何人都知道我做错了什么?

1 个答案:

答案 0 :(得分:0)

    public class FragmentFavorites extends Fragment {
        View view;
        TextView text;
        Homescreen home = new Homescreen();
        public View onCreateView(LayoutInflater inflater, ViewGroup container, Bundle savedInstanceState) {
            view = inflater.inflate(R.layout.favorites,container, false);
            text = (TextView) view.findViewById(R.id.textView2);
            FetchHtml fetchHtml = new FetchHtml(getActivity().getApplicationContext(), FragmentFavorites.this);
            fetchHtml.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR, "http://pastebin.com/u7jHeNwf");
            return view;
        }

    public static class FetchHtml extends AsyncTask<String, Void, String> {

            Context mContext;
            WeakReference<FragmentFavorites> mClient;

            public RegisterGcmTask(Context context, FragmentFavorites client) {
                this.mContext = context;
                this.mClient = new WeakReference<>(client);
            }

            @Override
            protected String doInBackground(String... params) {
                  try {
                   return getHtml(params[0]);
                } catch (IOException e) {
                   e.printStackTrace();
                   return null;
                }
            }

            @Override
            protected void onPostExecute(String html) {
                super.onPostExecute(token);
                if (null != mClient && null != mClient.get()) {
                    if (null != html) {
                        mClient.get().text.setText(html);
                    } else {
                       mClient.get().text.setText("Error fetching html");
                    }
                }
            }

        private static String getHtml(String url) throws IOException {
            URLConnection connection = (new URL(url)).openConnection();
            connection.setConnectTimeout(5000);
            connection.setReadTimeout(5000);
            connection.connect();

            InputStream in = connection.getInputStream();
            BufferedReader reader = new BufferedReader(new InputStreamReader(in));
            StringBuilder html = new StringBuilder();
            for (String line; (line = reader.readLine()) != null; ) {
                html.append(line);
            }
            in.close();

            return html.toString();
        }

    }
}