用于爬网的简单Java代码无法正常工作

时间:2013-05-05 09:47:07

标签: java url web-crawler malformedurlexception

    public class Crawler {
        public static void main(String[] args)  {
            List<String> Web = new ArrayList<String>();
            Web.add("www.thehindu.com");
            Web.add("www.indianexpress.com");
            Web.add("www.ndtv.com");
            Web.add("www.tehekla.com");

            try {

                for (int i = 0; i < Web.size(); i ++) {
                    // URL my_url = new URL("http://www.thehindu.com/");
                    String a = Web.get(i).toString();
                    System.out.println(a);
                    URL my_url = new URL(a);
                    BufferedReader br = new BufferedReader(new InputStreamReader(my_url.openStream()));
                    String strTemp = "";
                    while(null != (strTemp = br.readLine())) {
                        System.out.println(strTemp);
                    }
               }
          } catch (Exception ex) {
              ex.printStackTrace();
          }
    }
}

当我尝试运行此代码时,错误显示为:

java.net.MalformedURLException: no protocol: www.thehindu.com 

2 个答案:

答案 0 :(得分:3)

尝试在每个网址前添加http://

答案 1 :(得分:0)

您需要在网站地址

之前放置http
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;

public class Crawler {
    public static void main(String[] args) {
        List<String> Web = new ArrayList<String>();
        Web.add("http://www.thehindu.com");
        Web.add("http://www.indianexpress.com");
        Web.add("http://www.ndtv.com");
        Web.add("http://www.tehekla.com");

        try {

            for (int i = 0; i < Web.size(); i++) {
                // URL my_url = new URL("http://www.thehindu.com/");
                String a = Web.get(i).toString();
                System.out.println(a);
                URL my_url = new URL(a);
                BufferedReader br = new BufferedReader(new InputStreamReader(
                        my_url.openStream()));
                String strTemp = "";
                while (null != (strTemp = br.readLine())) {
                    System.out.println(strTemp);
                }
            }
        } catch (Exception ex) {
            ex.printStackTrace();
        }
    }
}