错误:执行上下文被破坏,最有可能是由于导航

时间:2019-10-19 14:32:16

标签: javascript node.js web-scraping automation puppeteer

我正在尝试抓取一些网站。导航如下:

  1. 打开开始页面。
  2. 一个个地单击每个URL,记录相关信息。
  3. 转到下一页,重复步骤2。
  4. 当没有其他下一个按钮可用时,关闭浏览器。

问题是当我单击列表中的第一个URL并记录了一些信息后,它通过抛出错误Error: Execution context was destroyed, most likely because of a navigation.

停止了执行

是否可以保留上一页的深层克隆并以某种方式使用它来单击下一个URL?还是有其他方法可以实现此导航?

这是我的代码:

index.js

const hatla2ee = require('./hatla2ee');

(async () => {
await hatla2ee.initialize();

let results = await hatla2ee.getListings();
debugger;   
})(); 

hatla2ee.js

const puppeteer = require("puppeteer");
const chalk = require("chalk");
var fs = require("fs");

const error = chalk.bold.red;
const success = chalk.keyword("green");

const start_url = "https://eg.hatla2ee.com/en/car"

const self = {
  browser: null,
  page: null,

  initialize: async () => {
    self.browser = await puppeteer.launch({ headless: false });
    self.page = await self.browser.newPage();
    self.pageNew = await self.browser.newPage();
    await self.page.setViewport({ width: 1440, height: 768 });

    await self.page.goto(start_url, { waitUntil: 'networkidle0' });
   },

 getListings: async () => {
    let results = [];

    do {
            let new_results = await self.getUrls();

            results = [results, ...new_results]
            console.log(results);
            let nextButton = await self.page.$x(`//div[@class='pagination pagination-right']/ul/li`);
            let nextPage = await nextButton[nextButton.length-1];
            let buttonText = (await(await nextPage.getProperty(`innerText`)).jsonValue());
            console.log(success(buttonText))
            if (buttonText.includes(`Next`)){
                await nextPage.click();
                await self.page.waitForNavigation({ waitUntil: 'networkidle0' });
            }else{
                break;
            }
        }while (true);

        return results;
},

getUrls: async () => {
    let urls = await self.page.$x(`//a[@class='NewListTitle']`);

    let results = [];

    for (let url of urls) {
        url.click()
        await self.page.waitForNavigation({ waitUntil: 'networkidle0' });
        /*let href = await (await url.getProperty(`href`)).jsonValue()
        results.push(href)
        console.log(href)
        await self.page.goto(href, { waitUntil: 'networkidle0' });
        await self.page.waitForNavigation({ waitUntil: 'networkidle0' });
        */

    }

    return results;

  }
}

module.exports = self;

0 个答案:

没有答案