我正在尝试抓取一些网站。导航如下:
问题是当我单击列表中的第一个URL并记录了一些信息后,它通过抛出错误Error: Execution context was destroyed, most likely because of a navigation.
是否可以保留上一页的深层克隆并以某种方式使用它来单击下一个URL?还是有其他方法可以实现此导航?
这是我的代码:
index.js
const hatla2ee = require('./hatla2ee');
(async () => {
await hatla2ee.initialize();
let results = await hatla2ee.getListings();
debugger;
})();
hatla2ee.js
const puppeteer = require("puppeteer");
const chalk = require("chalk");
var fs = require("fs");
const error = chalk.bold.red;
const success = chalk.keyword("green");
const start_url = "https://eg.hatla2ee.com/en/car"
const self = {
browser: null,
page: null,
initialize: async () => {
self.browser = await puppeteer.launch({ headless: false });
self.page = await self.browser.newPage();
self.pageNew = await self.browser.newPage();
await self.page.setViewport({ width: 1440, height: 768 });
await self.page.goto(start_url, { waitUntil: 'networkidle0' });
},
getListings: async () => {
let results = [];
do {
let new_results = await self.getUrls();
results = [results, ...new_results]
console.log(results);
let nextButton = await self.page.$x(`//div[@class='pagination pagination-right']/ul/li`);
let nextPage = await nextButton[nextButton.length-1];
let buttonText = (await(await nextPage.getProperty(`innerText`)).jsonValue());
console.log(success(buttonText))
if (buttonText.includes(`Next`)){
await nextPage.click();
await self.page.waitForNavigation({ waitUntil: 'networkidle0' });
}else{
break;
}
}while (true);
return results;
},
getUrls: async () => {
let urls = await self.page.$x(`//a[@class='NewListTitle']`);
let results = [];
for (let url of urls) {
url.click()
await self.page.waitForNavigation({ waitUntil: 'networkidle0' });
/*let href = await (await url.getProperty(`href`)).jsonValue()
results.push(href)
console.log(href)
await self.page.goto(href, { waitUntil: 'networkidle0' });
await self.page.waitForNavigation({ waitUntil: 'networkidle0' });
*/
}
return results;
}
}
module.exports = self;