jsoup教程抛出java.lang.IllegalArgumentException

时间:2014-09-05 02:21:51

标签: java jsoup

我正在关注Jsoup示例,试图了解它是如何工作的,并按原样复制并粘贴所有代码,但我不断收到错误'线程中的异常“main”java.lang.IllegalArgumentException:http://news.ycombinator.com/ at org.jsoup.helper.Validate.isTrue(Validate.java:45)     在jsoup.examples.ListLinks.main(ListLinks.java:21)'。 IDE是NetBeans。错误在哪里?

package jsoup.examples;
import org.jsoup.Jsoup;
import org.jsoup.helper.Validate;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.IOException;

/**
* Example program to list links from a URL.
*/
public class ListLinks {
    public static void main(String[] args) throws IOException {
       Validate.isTrue(args.length == 1, "http://news.ycombinator.com/");
       String url = args[0];
       print("Fetching %s...", url);

    Document doc = Jsoup.connect(url).get();
    Elements links = doc.select("a[href]");
    Elements media = doc.select("[src]");
    Elements imports = doc.select("link[href]");

    print("\nMedia: (%d)", media.size());
    for (Element src : media) {
        if (src.tagName().equals("img"))
            print(" * %s: <%s> %sx%s (%s)",
                    src.tagName(), src.attr("abs:src"), src.attr("width"), src.attr("height"),
                    trim(src.attr("alt"), 20));
        else
            print(" * %s: <%s>", src.tagName(), src.attr("abs:src"));
    }

    print("\nImports: (%d)", imports.size());
    for (Element link : imports) {
        print(" * %s <%s> (%s)", link.tagName(),link.attr("abs:href"), link.attr("rel"));
    }

    print("\nLinks: (%d)", links.size());
    for (Element link : links) {
        print(" * a: <%s>  (%s)", link.attr("abs:href"), trim(link.text(), 35));
    }
}

private static void print(String msg, Object... args) {
    System.out.println(String.format(msg, args));
}

private static String trim(String s, int width) {
    if (s.length() > width)
        return s.substring(0, width-1) + ".";
    else
        return s;
   }
}

1 个答案:

答案 0 :(得分:0)

返回的响应代码是403,这意味着禁止。

尝试在执行get之前设置用户代理。类似的东西:

Document doc = Jsoup.connect(url).userAgent("Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/25.0").get();

上述更改的代码对我有用。