我无法点击带有幻像节点的链接

时间:2017-04-01 21:43:30

标签: node.js web-scraping phantomjs nodes cheerio

我告诉你我想做什么。我正在抓tematika.com网站,以便能够为我创建一个书籍基地。问题是我需要从一页到另一页,但我不能,所以我总是得到相同的书。我认为使用代码他们会更好地理解它。

const phantom = require("phantom");
const cheerio = require("cheerio");

let _instance, _page;
let _books = [];

phantom.create().then(instance => {
 _instance = instance;
 return _instance.createPage();
}).then(page => {
 _page = page;
 return _page.open('http://www.tematika.com/catalogo/libros/arte__arquitectura_y_diseno--9.htm');
}).then(status => {
 console.log(status);
 return _page.property('content')
}).then(html => {

 // I transform the html into an object to manipulate it with cheerio
 let $ = cheerio.load(html);

 //I get all the books and pass them to an array to go through them and extract the info I'm looking for
 let books = $('.Gcentro').find('.moduleproductob').toArray();

 //As always there are 10 pages I make a for it it itere 10 times
 for(let i=0;i<10;i++){
  books.forEach(book => {
  let _book = {};
  _book.imageMin = $(book).find('.Gimagesproductos').attr('src');
  _book.linkBook = $(book).find('.FProductos').attr('href');
  _books.push(_book);
 });

 //In this part what I look for is to click on the next button on the page and it is what is not working for me
 //What ends up happening is that it never goes back to the page. So always bring me the info of the same 10 books.
_page.evaluate(function() {
 var evObj = document.createEvent('Events');
 evObj.initEvent('click', true, false);
 document.getElementById('catalogNext').dispatchEvent(evObj);
});
}
  console.log(_books);
  _page.close();
  _instance.exit();
}).catch(e => console.log(e));

我需要的是能够点击下一个并能够为我带来正在展示的书籍。如果不是幻像节点,而cheerio可以使用其他工具,我只需要在节点上工作。

谢谢! :)

0 个答案:

没有答案