Casperjs显示Phantomjs崩溃

时间:2015-02-17 04:15:00

标签: javascript phantomjs casperjs

我对casperjs有点严重的问题。 我尝试从网站上提取640行数据。第一步是找到找到的总数据(即640),之后我迭代所有找到的数据并存储到数据库fia sendAJAX(同步) 这是我的代码

try{
    casper.then(function(){
        if(total_page > 0){
            var total_hotel = 0;

            //this.echo("Hit here");

            for(var j=0; j<listHotelPage.length; j++){
                total_hotel += listHotelPage[j].listHotelObj.length;
            }

            //save hotels amount on related city
            var save = this.evaluate(function(total_hotel, city_code){
                var wsurl       = "http://localhost:9000/Cities/saveHotelNum";
                var data        = new Object();
                data.cityCode   = city_code;
                data.hotelNum   = total_hotel;
                __utils__.sendAJAX(wsurl, "POST" , data, false, { contentType: "application/x-www-form-urlencoded" });
            }, total_hotel, city_code);

            this.echo("Saved Hotel With Amount = "+total_hotel);

            //save all hotels found on related city
            //this.echo("Insert 1");

            this.evaluate(function(listHotelPage, city_code){
                var save_hotel_url  = "http://localhost:9000/Hotels/saveHotelRest";
                var data            = new Object();

                window.__utils__.echo("List Hotel Page Length :"+listHotelPage.length);
                window.__utils__.echo("City Code :"+city_code);

                for(var i=0; i<listHotelPage.length; i++){
                    window.__utils__.echo("Iteration #"+i);
                    for(var j=0; j<listHotelPage[i].listHotelObj.length; j++){
                        data                = new Object();
                        data.hotelCode      = listHotelPage[i].listHotelObj[j].hotel_code;
                        data.hotelName      = listHotelPage[i].listHotelObj[j].hotel_name;
                        data.hotelAddress   = listHotelPage[i].listHotelObj[j].hotel_address;
                        data.photo          = listHotelPage[i].listHotelObj[j].base64img;
                        data.hotelStar      = listHotelPage[i].listHotelObj[j].star_score;
                        data.cityCode       = city_code;

                        __utils__.sendAJAX(save_hotel_url, "POST" , data, false, { contentType: "application/x-www-form-urlencoded" });

                        var room_length     = listHotelPage[i].listHotelObj[j].listDescriptionObj.length;
                        var room_data       = new Object();
                        var save_room_url   = "http://localhost:9000/Rooms/saveRoomRest";

                        //window.__utils__.echo("Save 2.5");

                        for(var k=0; k<room_length; k++){
                            room_data = new Object();
                            room_data.hotelCode     = data.hotelCode;
                            room_data.categoryName  = listHotelPage[i].listHotelObj[j].listDescriptionObj[k].room_category;
                            room_data.roomService   = listHotelPage[i].listHotelObj[j].listDescriptionObj[k].room_service;
                            room_data.roomPrice     = listHotelPage[i].listHotelObj[j].listDescriptionObj[k].room_price;
                            room_data.available     = listHotelPage[i].listHotelObj[j].listDescriptionObj[k].available;
                            room_data.currencyCode  = listHotelPage[i].listHotelObj[j].listDescriptionObj[k].currencyCode;
                            //window.__utils__.echo("Save 3");
                            __utils__.sendAJAX(save_room_url, "POST" , room_data, false, { contentType: "application/x-www-form-urlencoded" });
                        }

                        window.__utils__.echo("Save #"+i);
                    }
                }

            }, listHotelPage, city_code);   

        }
        else if(total_page == null || total_page <= 0 || total_page == ""){
            this.evaluate(function(total_hotel, city_code){
                var wsurl       = "http://localhost:9000/Cities/saveHotelNum";
                var data        = new Object();
                data.cityCode   = city_code;
                data.hotelNum   = -1;
                __utils__.sendAJAX(wsurl, "POST" , data, false, { contentType: "application/x-www-form-urlencoded" });
            }, total_hotel, city_code);

        }

        time2 = new Date();
        var diff = Math.abs(time1 - time2);
        this.echo("Execution time :"+diff+" ms");
    });

    date_counter++;
}
catch(e){
    casper.then(function(){
        this.echo("Error 12");
    });
}

可疑原因在于此部分:

this.evaluate(function(listHotelPage, city_code){}

我尝试评估评估代码中的所有代码,但崩溃仍然继续......

但是当我试图评论所有这一部分时,它的表现相当不错。事故已经消失......

再一次,如果我添加另一个评估,它只回显这样的文本(它取代了我之前提到的评估代码)

this.evaluate(function(listHotelPage){
    window.__utils__.echo("List Hotel Page Length :"+listHotelPage.length);
}, listHotelPage);

它仍然会导致崩溃。

我仍然无法弄清楚为什么会这样。我认为这可能是因为使用了评估代码。但是如何重新评估代码会带来这样的混乱?

有什么建议吗?

这是填充listHotelPage的方式:

hotel_number    = this.evaluate(function(){ return document.querySelectorAll("div#dResult > div").length; });

//this.echo("Hotel Number :"+hotel_number);

listHotelObj = new Array();
for(var i=1; i<=parseInt(hotel_number); i++){
    category_len    = this.evaluate(function(i){ return document.querySelectorAll("div#dResult > div:nth-child("+i+") > div > div:nth-child(4) > div > div:nth-child(1) > span > div").length; }, i);
    div_hotel_id    = this.evaluate(function(i){ return document.querySelector("div#dResult > div:nth-child("+i+")").id; }, i);
    hotel_code      = div_hotel_id.replace("Display", "");
    hotel_name      = this.fetchText(x("//*[@id='dResult']/div["+i+"]/div/div[2]/div/div[2]/strong/a[1]"));
    hotel_address   = this.fetchText(x("//*[@id='dResult']/div["+i+"]/div/div[2]/div/div[2]/em/label"));
    hotel_style     = this.fetchText(x("//*[@id='dResult']/div["+i+"]/div/div[2]/div/div[2]/div[1]/span"));
    hotel_location  = this.fetchText(x("//*[@id='dResult']/div["+i+"]/div/div[2]/div/div[2]/div[2]/span"));
    star_amount     = this.evaluate(function(i){ return document.querySelectorAll("#dResult > div:nth-child("+i+") > div > div:nth-child(2) > div > div:nth-child(3) > div > label > img").length; }, i);
    photo_url       = this.evaluate(function(i){ return document.querySelector("#dResult > div:nth-child("+i+") > div > div:nth-child(2) > div > div:nth-child(1) > a > img").src; }, i);
    base64img       = this.base64encode(photo_url);
    star_score      = 0;

    var detail_url      = "http://www.mgholiday.com/b2b/Accom/HotelDescription.php?Code="+hotel_code;

    listDescriptionObj = new Array();

    for(var rows=1; rows <= category_len; rows++){
        room_category   = this.fetchText(x("//*[@id='dResult']/div["+i+"]/div/div[4]/div/div[1]/span/div["+rows+"]/div[1]/span"));
        room_service    = this.fetchText(x("//*[@id='dResult']/div["+i+"]/div/div[4]/div/div[1]/span/div["+rows+"]/div[2]/span"));
        room_price      = this.fetchText(x("//*[@id='dResult']/div["+i+"]/div/div[4]/div/div[1]/span/div["+rows+"]/div[3]/span"));
        image_src       = this.evaluate(function(rows, i){
            return jQuery("div#dResult > div:nth-child("+i+") > div > div:nth-child(4) > div > div:nth-child(1) > span > div:nth-child("+rows+") > div:nth-child(4) > span > img")[0].src;
        }, rows, i);
        url_img_arr     = image_src.split("/");
        status_img      = url_img_arr[url_img_arr.length-1];

        if(status_img == "btnAV-v3.gif"){
            available = 1;
        }
        else{
            available = 0;
        }

        var price_part      = room_price.split(" "); //parse price, get online first array index (removing currency)
        var currencyCode    = price_part[1];
        var raw_price       = price_part[0].split(","); //split by comma
        var pure_price      = raw_price[0]; //get first array index of parsed_price

        for(var a=0; a<raw_price.length-1; a++){
            pure_price = pure_price * 1000;
        }

        descriptionObj                  = new Object();
        descriptionObj.room_category    = room_category;
        descriptionObj.room_service     = room_service;
        descriptionObj.room_price       = pure_price;
        descriptionObj.available        = available;
        descriptionObj.currencyCode     = currencyCode;

        listDescriptionObj.push(descriptionObj);
    }

    for(var rows=1; rows <= star_amount; rows++){
        var star_url            = this.evaluate(function(i, rows){ return document.querySelector("#dResult > div:nth-child("+i+") > div > div:nth-child(2) > div > div:nth-child(3) > div > label > img:nth-child("+rows+")").src; }, i, rows);
        var star_name_array     = star_url.split("/");
        var star_name           = star_name_array[star_name_array.length-1];

        if(star_name == "star1.gif"){
            star_score += 1;
        }
        if(star_name == "starh.gif"){
            star_score += 0.5;
        }
        if(star_name == "star0.gif"){
            star_score += 0;
        }
    }

    hotelObj                    = new Object();
    hotelObj.hotel_code         = hotel_code;
    hotelObj.hotel_name         = hotel_name;
    hotelObj.base64img          = base64img;
    hotelObj.star_score         = star_score;
    hotelObj.hotel_address      = hotel_address;
    hotelObj.hotel_location     = hotel_location;
    hotelObj.hotel_style        = hotel_style;
    hotelObj.hotel_address      = hotel_address;
    hotelObj.phone_number       = phone_number;
    hotelObj.fax_number         = fax_number;
    hotelObj.total_room         = total_room;

    hotelObj.listDescriptionObj = listDescriptionObj;

    listHotelObj.push(hotelObj);

}

pageResultObj               = new Object();
pageResultObj.page_num      = (page_iterator+1);
pageResultObj.listHotelObj  = listHotelObj;

listHotelPage.push(pageResultObj);

if(page_iterator < total_page-1){
    this.click(x("//*[@id='pg-top-cnt-"+(page_iterator+1)+"']"));
}
page_iterator++;

1 个答案:

答案 0 :(得分:0)

我找到了解决方案。 它与我的listHotelPage无关。 问题是,当我在evaluate函数中运行大量js代码时,它会发生错误(phantomjs崩溃),因为evaluate无法运行或保存太多数据。 我试图传递listHotelPage并对其进行逻辑操作。结果是一样的,崩溃。但是如果listHotelPage填充代码真的需要,那么我们在这里......

    hotel_number    = this.evaluate(function(){ return document.querySelectorAll("div#dResult > div").length; });

//this.echo("Hotel Number :"+hotel_number);

listHotelObj = new Array();
for(var i=1; i<=parseInt(hotel_number); i++){
    category_len    = this.evaluate(function(i){ return document.querySelectorAll("div#dResult > div:nth-child("+i+") > div > div:nth-child(4) > div > div:nth-child(1) > span > div").length; }, i);
    div_hotel_id    = this.evaluate(function(i){ return document.querySelector("div#dResult > div:nth-child("+i+")").id; }, i);
    hotel_code      = div_hotel_id.replace("Display", "");
    hotel_name      = this.fetchText(x("//*[@id='dResult']/div["+i+"]/div/div[2]/div/div[2]/strong/a[1]"));
    hotel_address   = this.fetchText(x("//*[@id='dResult']/div["+i+"]/div/div[2]/div/div[2]/em/label"));
    hotel_style     = this.fetchText(x("//*[@id='dResult']/div["+i+"]/div/div[2]/div/div[2]/div[1]/span"));
    hotel_location  = this.fetchText(x("//*[@id='dResult']/div["+i+"]/div/div[2]/div/div[2]/div[2]/span"));
    star_amount     = this.evaluate(function(i){ return document.querySelectorAll("#dResult > div:nth-child("+i+") > div > div:nth-child(2) > div > div:nth-child(3) > div > label > img").length; }, i);
    photo_url       = this.evaluate(function(i){ return document.querySelector("#dResult > div:nth-child("+i+") > div > div:nth-child(2) > div > div:nth-child(1) > a > img").src; }, i);
    base64img       = this.base64encode(photo_url);
    star_score      = 0;

    var detail_url      = "http://www.mgholiday.com/b2b/Accom/HotelDescription.php?Code="+hotel_code;

    listDescriptionObj = new Array();

    for(var rows=1; rows <= category_len; rows++){
        room_category   = this.fetchText(x("//*[@id='dResult']/div["+i+"]/div/div[4]/div/div[1]/span/div["+rows+"]/div[1]/span"));
        room_service    = this.fetchText(x("//*[@id='dResult']/div["+i+"]/div/div[4]/div/div[1]/span/div["+rows+"]/div[2]/span"));
        room_price      = this.fetchText(x("//*[@id='dResult']/div["+i+"]/div/div[4]/div/div[1]/span/div["+rows+"]/div[3]/span"));
        image_src       = this.evaluate(function(rows, i){
            return jQuery("div#dResult > div:nth-child("+i+") > div > div:nth-child(4) > div > div:nth-child(1) > span > div:nth-child("+rows+") > div:nth-child(4) > span > img")[0].src;
        }, rows, i);
        url_img_arr     = image_src.split("/");
        status_img      = url_img_arr[url_img_arr.length-1];

        if(status_img == "btnAV-v3.gif"){
            available = 1;
        }
        else{
            available = 0;
        }

        var price_part      = room_price.split(" "); //parse price, get online first array index (removing currency)
        var currencyCode    = price_part[1];
        var raw_price       = price_part[0].split(","); //split by comma
        var pure_price      = raw_price[0]; //get first array index of parsed_price

        for(var a=0; a<raw_price.length-1; a++){
            pure_price = pure_price * 1000;
        }

        descriptionObj                  = new Object();
        descriptionObj.room_category    = room_category;
        descriptionObj.room_service     = room_service;
        descriptionObj.room_price       = pure_price;
        descriptionObj.available        = available;
        descriptionObj.currencyCode     = currencyCode;

        listDescriptionObj.push(descriptionObj);
    }

    for(var rows=1; rows <= star_amount; rows++){
        var star_url            = this.evaluate(function(i, rows){ return document.querySelector("#dResult > div:nth-child("+i+") > div > div:nth-child(2) > div > div:nth-child(3) > div > label > img:nth-child("+rows+")").src; }, i, rows);
        var star_name_array     = star_url.split("/");
        var star_name           = star_name_array[star_name_array.length-1];

        if(star_name == "star1.gif"){
            star_score += 1;
        }
        if(star_name == "starh.gif"){
            star_score += 0.5;
        }
        if(star_name == "star0.gif"){
            star_score += 0;
        }
    }

    hotelObj                    = new Object();
    hotelObj.hotel_code         = hotel_code;
    hotelObj.hotel_name         = hotel_name;
    hotelObj.base64img          = base64img;
    hotelObj.star_score         = star_score;
    hotelObj.hotel_address      = hotel_address;
    hotelObj.hotel_location     = hotel_location;
    hotelObj.hotel_style        = hotel_style;
    hotelObj.hotel_address      = hotel_address;
    hotelObj.phone_number       = phone_number;
    hotelObj.fax_number         = fax_number;
    hotelObj.total_room         = total_room;

    hotelObj.listDescriptionObj = listDescriptionObj;

    listHotelObj.push(hotelObj);

}

pageResultObj               = new Object();
pageResultObj.page_num      = (page_iterator+1);
pageResultObj.listHotelObj  = listHotelObj;

listHotelPage.push(pageResultObj);

if(page_iterator < total_page-1){
    this.click(x("//*[@id='pg-top-cnt-"+(page_iterator+1)+"']"));
}
page_iterator++;

抱歉我的打字丢失了......

我们走了......

我从评估方法中提取代码......就像这样......

for(var i=0; i<listHotelPage.length; i++){
    for(var j=0; j<listHotelPage[i].listHotelObj.length; j++){
        data                = new Object();
        data.hotelCode      = listHotelPage[i].listHotelObj[j].hotel_code;
        data.hotelName      = listHotelPage[i].listHotelObj[j].hotel_name;
        data.hotelAddress   = listHotelPage[i].listHotelObj[j].hotel_address;
        data.photo          = listHotelPage[i].listHotelObj[j].base64img;
        data.hotelStar      = listHotelPage[i].listHotelObj[j].star_score;
        data.cityCode       = city_code;

        this.evaluate(function(save_hotel_url, data){
            __utils__.sendAJAX(save_hotel_url, "POST" , data, false, { contentType: "application/x-www-form-urlencoded" });
        }, save_hotel_url, data);

        var room_length     = listHotelPage[i].listHotelObj[j].listDescriptionObj.length;
        var room_data       = new Object();
        var save_room_url   = "http://localhost:9000/Rooms/saveRoomRest";

        for(var k=0; k<room_length; k++){
            room_data = new Object();
            room_data.hotelCode     = data.hotelCode;
            room_data.categoryName  = listHotelPage[i].listHotelObj[j].listDescriptionObj[k].room_category;
            room_data.roomService   = listHotelPage[i].listHotelObj[j].listDescriptionObj[k].room_service;
            room_data.roomPrice     = listHotelPage[i].listHotelObj[j].listDescriptionObj[k].room_price;
            room_data.available     = listHotelPage[i].listHotelObj[j].listDescriptionObj[k].available;
            room_data.currencyCode  = listHotelPage[i].listHotelObj[j].listDescriptionObj[k].currencyCode;

            this.evaluate(function(save_room_url, room_data){
                __utils__.sendAJAX(save_room_url, "POST" , room_data, false, { contentType: "application/x-www-form-urlencoded" });
            }, save_room_url, room_data);
        }
    }
}

正如您所看到的,我在evaluate函数之外拉出for循环,因此它将为评估带来较少的执行... 在最后一节中,this.evaluate函数仅评估__utils__.sendAJAX(save_room_url, "POST" , room_data, false, { contentType: "application/x-www-form-urlencoded" }); On the previous example, all iteration code and all scraped data was executed in this.evaluate()