刮掉"加载更多"按钮JS

时间:2017-12-01 10:55:01

标签: javascript web-scraping nightmare

我试图通过加载更多按钮来抓取一个网站,但我无法在噩梦中执行递归功能。我的代码是这样的:

const Nightmare = require('nightmare');
const nightmare = Nightmare({
 show:true
});// }
const request = require('request');
const cheerio = require('cheerio');

let url = 'https://www.housers.com/es/proyectos/avanzado';
let propertyArray = [];

var getThePage = function() {

     nightmare
      .goto('https://www.housers.com/es/proyectos/avanzado')
      .wait(1500)
      .click('#loadMore')
      .evaluate(() =>{
         return document.querySelector('.all-info').innerHTML;
       })
     .end()
     .then((result) => {
        let $ = cheerio.load(result);
        let loadMore = $('#loadMore')
        if (loadMore) {
            getThePage();
        }
        return result
        })
       .catch((error) => {
        console.error('Search failed:', error);
        });
        }
    getThePage()

我不知道你是否有办法通过这种方法或任何其他想法来做到这一点

1 个答案:

答案 0 :(得分:0)

如果要废弃表格中的数据,则无需使用噩梦。在网络选项卡中,您会看到它调用此端点:

https://www.housers.com/es/proyectos/avanzado/scroll

有一些分页&页面大小,让我们每页200(不知道它是否高于限制)。

然后你只需要解析html&将数据放入数组:

const axios = require('axios');
const querystring = require('querystring');
const cheerio = require('cheerio');
const entities = require("entities");

const url = 'https://www.housers.com/es/proyectos/avanzado/scroll';

const prices = [];

function doRequest(url, page){
  return axios.post(url + '?page=' + page + '&size=200', querystring.stringify({
    word: "",
    country: "",
    type: "",
    order: "STOCK_PRICE_VARIATION",
    orderDirection: "DESC"
  }));
}

async function getPrices() {
  var empty = false;
  var page = 0;

  while (!empty) {
    //call API
    console.log("GET page n°" + page);
    var res = await doRequest(url, page);
    page++;

    //parse HTML
    const $ = cheerio.load(res.data,{ 
      xmlMode: true,
      normalizeWhitespace: true,
      decodeEntities: true
    });

    if (res.data.trim() !== ""){
      //extract prices : put it in array
      $('tr').map(function(){
        var obj = [];
        $(this).children('td').map(function(){
          obj.push(entities.decodeHTML($(this).text().trim()));
        });
        prices.push(obj);
      });
    }
    else {
      empty = true;
    }
  }
  console.log(prices);
  console.log("total length : " + prices.length);
}

getPrices();