Java程序从网站地址读取html

时间:2016-06-30 09:22:18

标签: java

嗨我正在搞清楚这个程序,但一直得到null作为答案。任何帮助将不胜感激。我不能使用任何外部方法,并且必须声明一个静态方法。这是我的代码:

import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Scanner;


public class Links {



        private static ArrayList<String> links;

        public static ArrayList<String> getHTMLLinksFromPage(String location) {

            String webpage = location;

            for(int i = 0; i<webpage.length()-6; i++) {
                if(webpage.charAt(i) == 'h' && webpage.charAt(i+1) == 'r') {
                    for(int k = i; k<webpage.length();k++ ){
                        if(webpage.charAt(k) == '>'){
                            String link = webpage.substring(i+6,k-1);
                            links.add(link);
                            // Break the loop 
                            k = webpage.length();
                        }
                    }
                }
            }
            return links;
        }

    public static void main(String[] args) throws IOException{

        String address = "http://horstmann.com/index.html.";
          URL pageLocation = new URL(address);
          Scanner in = new Scanner(pageLocation.openStream());
          String webpage = in.next();

                  ArrayList<String> x = getHTMLLinksFromPage(webpage);
        System.out.println(x);
    }
}

1 个答案:

答案 0 :(得分:0)

您的代码存在一些问题:

首先,您没有初始化名为ArrayList的{​​{1}}。

其次,您网址末尾有一个额外的links,导致.

同样要打破FileNotFoundException循环,您应该使用for语句。

第三,您正在错误地阅读网页。您只需拨打break一次只读取网页的第一个标记。在这种情况下scanner.next()。要阅读整个网页,您需要继续致电<?xml

不过使用scanner.next(),我相信使用ScannerInputStreamReader会更快。

所以你的代码应该是这样的:

BufferedReader

输出:

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;

public class Links {

    public static void main(String[] args) throws IOException {

        URL pageLocation = new URL("http://horstmann.com/index.html");
        HttpURLConnection urlConnection = (HttpURLConnection) pageLocation.openConnection(); 
        InputStreamReader inputStreamReader = new InputStreamReader(urlConnection.getInputStream());
        BufferedReader bufferedReader = new BufferedReader(inputStreamReader);

        StringBuilder response = new StringBuilder();
        String line;
        while((line = bufferedReader.readLine()) != null) {
            response.append(line);
        }
        bufferedReader.close();

        System.out.println(getHTMLLinksFromPage(response.toString()));
    }

    private static List<String> getHTMLLinksFromPage(final String webPage) {

        List<String> links = new ArrayList<>();

        for(int i = 0; i < webPage.length()-6; i++) {
            if(webPage.charAt(i) == 'h' && webPage.charAt(i+1) == 'r') {
                for(int j = i; j < webPage.length(); j++ ){
                    if(webPage.charAt(j) == '>'){
                        String link = webPage.substring(i+6,j-1);
                        links.add(link);
                        break;
                    }
                }
            }
        }
        return links;
    }
}