Jsoup点击链接获取更多数据

时间:2018-01-10 22:58:39

标签: jsoup

package vinovisionparis;

import java.io.IOException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class vinovisionparis {
      public static void main(String[] args) {
          int z = 0;
          for(int page = 0; page<= 1; page++){

              Document doc = null;
              try {
                  doc = Jsoup.connect("https://www.vinovisionparis.com/Catalogue-2018/Liste-des-exposants/(search_on)/all/(limit)/48/(sort)/raisonSociale_asc/(page)/" + page).get();
              } catch (IOException iOException) {
              }

Elements newsHeadlines = doc.select("div.block.block-page.catal-ex.mod-catal");
for (Element headline : newsHeadlines) {
      z++;
              if(z == 10 ) {break;}
    String title = headline.select("div.catal-ex-item-group-title h3  a").text();
    String desc = headline.select("div.catal-ex-item-desc p.catal-ex-item-country").text();
    String text = headline.select("div.catal-ex-item-desc div.catal-ex-item-txt p").text();
     String loc = headline.select("li").last().text();

    System.out.println(z +" "+ title +"  " + desc + " " + text +" " + loc); 
}


    }
}
}

如何点击标题链接以获取公司网站等其他数据以及点击链接时可以看到的其他详细信息?

1 个答案:

答案 0 :(得分:0)

您可以使用Element#attr(“href)获取href的链接URL。

https://jsoup.org/cookbook/extracting-data/attributes-text-html

如果你想获得绝对路径,你必须使用Element#attr(“abs:href”。你可以得到这样的详细页面。

public void test1() {
    try {
        Document doc = Jsoup.connect("https://www.vinovisionparis.com/Catalogue-2018/Liste-des-exposants/(search_on)/all/(limit)/48/(sort)/raisonSociale_asc/(page)/1").get();
        Elements newsHeadlines = doc.select("div.catal-ex-item");
        int i = 0;
        for (Element headline : newsHeadlines) {
            i++;
            if(i == 10 ) {break;}
            String title = headline.select("div.catal-ex-item-group-title h3  a").text();

            // get a link to detail page
            Elements aTag = headline.select("div.catal-ex-item-group-title h3  a");
            String detailPageUrl = "";

            // go to a detail page
            String companyPageUrl = "";
            if (!aTag.isEmpty()) {
                detailPageUrl = aTag.attr("abs:href");
                Document doc2 = Jsoup.connect(detailPageUrl).get();
                Elements companyPageLink = doc2.select("a.catal-ed-main-url-link.catal-ed-url-link");
                companyPageUrl = companyPageLink.attr("abs:href");
            }

            String desc = headline.select("div.catal-ex-item-desc p.catal-ex-item-country").text();
            String text = headline.select("div.catal-ex-item-desc div.catal-ex-item-txt p").text();
            System.out.println("----------");
            System.out.println("Title: " + title);
            System.out.println("Desc:"  + desc);
            System.out.println("Text: "+ text);
            System.out.println("CompanyPage: "+ companyPageUrl);
            System.out.println("----------");
        }

    } catch (IOException e) {
        e.printStackTrace();
    }
}