我开始使用谷歌应用引擎,但我是初学者
我创建了一个Web应用程序,并添加了库Jsoup
我试图从网站上解析大量数据,但是当我部署应用程序时,我收到了这个错误:
错误:服务器错误
服务器遇到错误,无法完成您的请求。 请在30秒后重试。
这是我的代码:
public void doGet(HttpServletRequest req, HttpServletResponse resp)
throws IOException {
resp.setContentType("text/plain");
resp.getWriter().println("{\"Restaurant\":[");
listPages = new ArrayList<>();
listRestaurant = new ArrayList<>();
listUrlRestaurant = new ArrayList<>();
listObj = new ArrayList<>();
try {
doc = Jsoup.connect(url).userAgent("Mozilla").timeout(60000).get();
//System.out.println(doc.select("strong.next.page-numbers").text());
int i=1;
while(doc.select("strong.next.page-numbers").text().contains("SUIV")){
listPages.add(url);
//System.out.println("exist : "+url);
//System.out.println("*******");
//restaurants = doc.select("div.listing_img > a");
url = url.replace("page/"+i+"/", "page/"+(i+1)+"/");
i=i+1;
//System.out.println("*****"+url);
doc = Jsoup.connect(url).userAgent("Mozilla").timeout(60000).get();
//ParsingRestaurant(restaurants,resp,doc);
}
} catch (IOException e) {
e.printStackTrace();
}
//System.out.println(listPages.size());
try{
for (int i = 0; i < listPages.size(); i++) {
doc2 = Jsoup.connect(listPages.get(i)).userAgent("Mozilla").timeout(60000).get();
restaurants = doc2.select("div.listing_img > a");
for (Element element : restaurants) {
listUrlRestaurant.add(element.attr("href"));
//System.out.println(element.attr("href"));
}
}
} catch (IOException e) {
e.printStackTrace();
}
//System.out.println(listUrlRestaurant.size());
for (int i = 0; i < listUrlRestaurant.size(); i++) {
ParsingRestaurant(listUrlRestaurant.get(i), resp, doc3,listObj);
}
for (int i = 0; i < listObj.size(); i++) {
if (i!=listObj.size()) {
resp.getWriter().println(listObj.get(i)+",");
}else{
resp.getWriter().println(listObj.get(i));
}
}
resp.getWriter().println("]}");
}
private void ParsingRestaurant(String url, HttpServletResponse resp, Document doc,List<String> listObj) {
// TODO Auto-generated method stub
Gson gson = new GsonBuilder().setPrettyPrinting().create();
Restaurant obj = new Restaurant();
try {
doc = Jsoup.connect(url).userAgent("Mozilla").timeout(60000).get();
name = doc.select("h1.entry-title").first();
obj.setName(name.text());
adress = doc.select("span#frontend_address").first();
obj.setAdress(adress.text());
facebook = doc.select("a#facebook").first();
if (facebook == null) {
obj.setFacebook("empty");
}else{
obj.setFacebook(facebook.attr("href"));
}
phone = doc.select("span.entry-phone.frontend_phone.listing_custom").first();
if (phone == null) {
obj.setPhone("empty");
}else{
obj.setPhone(phone.text());
}
time = doc.select("span.entry-listing_timing.frontend_listing_timing.listing_custom").first();
if (time == null) {
obj.setPhone("empty");
}else{
obj.setTime(time.text());
}
map = doc.select("div.google-map-directory > a ").first();
//System.out.println(name.text()+adress.text()+facebook.attr("href")+phone.text()+time.text());
String location = map.attr("href");
location = location.replace("http://www.google.com/maps/dir/Current+Location/", "");
String[] output = location.split(",");
obj.setLongitude(output[0]);
obj.setLatitude(output[1]);
images = doc.select("a.listing_img.galerie_listing");
for (Element e : images) {
obj.images.add(e.attr("href"));
}
details = doc.select("div#listing_apercu > div");
for (Element e : details) {
//System.out.println(e.select("label").text());
obj.titles.add(e.select("label").text());
String x = e.select("p > span").text();
for (int j = 1; j < x.length(); j++) {
if (Character.isUpperCase(x.charAt(j))) {
x = changeCharInPosition(j-1, ',', x);
}
}
obj.details.add(x);
}
String json = gson.toJson(obj);
listObj.add(json);
} catch (IOException e) {
e.printStackTrace();
}
}
public String changeCharInPosition(int position, char ch, String str){
char[] charArray = str.toCharArray();
charArray[position] = ch;
return new String(charArray);
}
}
关于这个问题的任何想法?!