所以我最初编译之前我做了一些更改以允许我指定搜索和位置。如果需要,我可以把它拿出来,但我宁愿不要。这就是我得到的:
import java.util.ArrayList;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.util.Scanner;
public class YelpScrapper
{
public static void main(String[] args) throws IOException
{
String description;
String location;
int pages;
Scanner keyboard = new Scanner(System.in);
System.out.print("Enter a description: ");
description = keyboard.nextLine();
System.out.print("Enter a location: ");
location = keyboard.nextLine();
System.out.print("How many pages should we scan? ");
pages = keyboard.nextInt();
String descString = "find_desc=" + description.replace(' ', '+') + "&";
String locString = "find_loc=" + location.replace(' ', '+') + "&";
int number = (pages * 10) - 10;
String url = "https://www.yelp.com/search?" + descString + locString + "start=" + number;
ArrayList<String> names = new ArrayList<String>();
ArrayList<String> address = new ArrayList<String>();
ArrayList<String> phone = new ArrayList<String>();
Document document = Jsoup.connect(url).get();
Elements nameElements = document.select(".indexed-biz-name span");
Elements addressElements = document.select(".secondary-attributes address");
Elements phoneElements = document.select(".biz-phone");
for (Element element : nameElements)
{
names.add(element.text());
}
for (Element element : addressElements)
{
address.add(element.text());
}
for (Element element : phoneElements)
{
phone.add(element.text());
}
for (int index = 0 ; index <= number ; index++)
{
System.out.println("\nLead " + index);
System.out.println("Company Name: " + names.get(index));
System.out.println("Address: " + address.get(index));
System.out.println("Phone Number: " + phone.get(index));
}
//for (String name : names)
//{
// System.out.println(name);
//}
//System.out.println("\n");
//for (String add : address)
//{
//System.out.println(add);
//}
//for (String pho : phone)
//{
//System.out.println(pho);
//}
}
}
我从BlueJ java.net.SocketTimeoutException收到的错误;读取超时(在java.net.SocketInputStream中)
我应该抛出另一个例外吗?非常感谢您提供的任何帮助!
答案 0 :(得分:0)
也许设置超时可能会有所帮助?
Document document = Jsoup.connect(url).timeout(10000).get();