如何使用Cheerio从Node JS中的多个页面(分页)中抓取数据

时间:2017-07-06 06:48:49

标签: node.js web-scraping cheerio

我在从网站的多个页面中抓取数据时遇到问题,我可以从网站的一个页面抓取数据并且工作正常,但是没有帮助从多个页面中抓取数据。在此先感谢,此代码工作正常,但我如何从多个页面获取相同的数据。

    var express= require('express');
    var path= require('path');
    var request= require('request');
    var cheerio= require('cheerio');
    var fs= require('fs');
    var vm= require('vm');
    var async= require('async');
    var app = express();
    var port=1235
    var arr=[];

    var url= "http://www.shophive.com/prices/mobile-phones/";
    request(url,function(err, res,body){
    var $= cheerio.load(body);

    if(err){
      return console.error(err);
      }  

   //console.log(arr.length);
   var links= $('.product-block-inner .product-name a', '.products-
   grid').each(function(){
   var url_links= $(this).attr('href');
    arr.push(url_links)

   getspecs(url_links, (data)=>{

    arr.push(data);
    arr.push(data.titleText);
    arr.push(data.priceText);
    arr.push(data.specificationsText);



    console.log("%j", data.priceText);
    console.log("%j", data.titleText);
    // console.log("%j", data.brandText;);
    console.log("%j", data.specificationsText);
    })

    })
    console.log(arr);

    })


    var getspecs= function(url1,callback ){
    request(url1,function(err, res, body){
    var $= cheerio.load(body);
    var json= {
        title:"",
        priceText:"",
        specificationsText:""
    };


    var price=$('span.regular-price ').first().text();   

    var priceText= price; 
    json.priceText= priceText;


    var title= $('.product-name h1');
    var titleText = title.text();
        json.titleText= titleText;


        var specifications= $('.padder table');
        var specificationsText= specifications.text();
        json.specificationsText= specificationsText;


        var phone_info ={
        title:titleText,

        price:priceText,

        specifications:specificationsText
        }; 


       callback(phone_info);

      })
      }
     app.listen('1235')
     console.log('running on prot'); 

0 个答案:

没有答案