使用Jsoup和谷歌应用引擎

时间:2016-02-01 02:12:46

标签: java google-app-engine jsoup

我开始使用谷歌应用引擎,但我是初学者 我创建了一个Web应用程序,并添加了库Jsoup 我试图从网站上解析大量数据,但是当我部署应用程序时,我收到了这个错误:

  

错误:服务器错误
  服务器遇到错误,无法完成您的请求。   请在30秒后重试。

这是我的代码:

public void doGet(HttpServletRequest req, HttpServletResponse resp)
        throws IOException {
    resp.setContentType("text/plain");
    resp.getWriter().println("{\"Restaurant\":[");
    listPages = new ArrayList<>();
    listRestaurant = new ArrayList<>();
    listUrlRestaurant = new ArrayList<>();
    listObj = new ArrayList<>();

    try {
        doc = Jsoup.connect(url).userAgent("Mozilla").timeout(60000).get();
        //System.out.println(doc.select("strong.next.page-numbers").text());
        int i=1;
        while(doc.select("strong.next.page-numbers").text().contains("SUIV")){
            listPages.add(url);
            //System.out.println("exist : "+url);
            //System.out.println("*******");
            //restaurants = doc.select("div.listing_img > a");
            url = url.replace("page/"+i+"/", "page/"+(i+1)+"/");
            i=i+1;
            //System.out.println("*****"+url);
            doc = Jsoup.connect(url).userAgent("Mozilla").timeout(60000).get();
            //ParsingRestaurant(restaurants,resp,doc);
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    //System.out.println(listPages.size());
    try{
        for (int i = 0; i < listPages.size(); i++) {
            doc2 = Jsoup.connect(listPages.get(i)).userAgent("Mozilla").timeout(60000).get();
            restaurants = doc2.select("div.listing_img > a");
            for (Element element : restaurants) {
                listUrlRestaurant.add(element.attr("href"));
                //System.out.println(element.attr("href"));
            }

        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    //System.out.println(listUrlRestaurant.size());
    for (int i = 0; i < listUrlRestaurant.size(); i++) {
        ParsingRestaurant(listUrlRestaurant.get(i), resp, doc3,listObj);
    }
    for (int i = 0; i < listObj.size(); i++) {
        if (i!=listObj.size()) {
            resp.getWriter().println(listObj.get(i)+",");
        }else{
            resp.getWriter().println(listObj.get(i));
        }
    }
    resp.getWriter().println("]}");


}
private void ParsingRestaurant(String url, HttpServletResponse resp, Document doc,List<String> listObj) {
    // TODO Auto-generated method stub
    Gson gson = new GsonBuilder().setPrettyPrinting().create();

        Restaurant obj = new Restaurant();
        try {
            doc = Jsoup.connect(url).userAgent("Mozilla").timeout(60000).get();
            name = doc.select("h1.entry-title").first();
            obj.setName(name.text());
            adress = doc.select("span#frontend_address").first();
            obj.setAdress(adress.text());
            facebook = doc.select("a#facebook").first();
            if (facebook == null) {
                obj.setFacebook("empty");
            }else{
                obj.setFacebook(facebook.attr("href"));
            }
            phone = doc.select("span.entry-phone.frontend_phone.listing_custom").first();
            if (phone == null) {
                obj.setPhone("empty");
            }else{
                obj.setPhone(phone.text());
            }
            time = doc.select("span.entry-listing_timing.frontend_listing_timing.listing_custom").first();
            if (time == null) {
                obj.setPhone("empty");
            }else{
                obj.setTime(time.text());
            }
            map = doc.select("div.google-map-directory > a ").first();
            //System.out.println(name.text()+adress.text()+facebook.attr("href")+phone.text()+time.text());
            String location = map.attr("href");
            location = location.replace("http://www.google.com/maps/dir/Current+Location/", "");
            String[] output = location.split(",");
            obj.setLongitude(output[0]);
            obj.setLatitude(output[1]);
            images = doc.select("a.listing_img.galerie_listing");
            for (Element e : images) {
                obj.images.add(e.attr("href"));
            }
            details = doc.select("div#listing_apercu > div");
            for (Element e : details) {
                //System.out.println(e.select("label").text());
                obj.titles.add(e.select("label").text());
                String x = e.select("p > span").text();
                for (int j = 1; j < x.length(); j++) {
                    if (Character.isUpperCase(x.charAt(j))) {
                        x = changeCharInPosition(j-1, ',', x);
                    }
                }
                obj.details.add(x);

            }
            String json = gson.toJson(obj);
            listObj.add(json);              
        } catch (IOException e) {
            e.printStackTrace();
        }


}
public String changeCharInPosition(int position, char ch, String str){
    char[] charArray = str.toCharArray();
    charArray[position] = ch;
    return new String(charArray);
}   
}

关于这个问题的任何想法?!

0 个答案:

没有答案