我正在尝试编写一个通用脚本来剪贴页面的数据(经过一些中间页面处理之后)。
有多种类型的中间页面需要处理。
处理中间页面需要用户交互,例如登录,单击等。
我可以根据URL模式验证该页面是否为中间页面。我的配置告诉我如何处理中间页。
中间页可以按任何顺序排列,并且可能需要任何数量的交互才能到达最后一页。
/*
Get page response
*/
let getPageResponse = async function (url, cookies, timeout) {
try {
let browser = await puppeteer.launch({ headless: false });
let page = await browser.newPage();
await page.setCookie(...cookies);
let response = await page.goto(url, {'timeout':timeout, 'waitUntil':['load','networkidle0']});
let intermediatePage = getIntermediatePage(page.url())// based on config get all required data for intermediate page
while ( intermediatePage ) {
console.log("<<<< Processing intermediate page starting with.. "+intermediatePage.url);
// Check type of intermediate page and handle accordingly
switch(intermediatePage.type) {
case 'login' :
await page.type(intermediatePage.username, secrets[0].id);
await page.type(intermediatePage.password, secrets[0].password);
break;
case 'click' :
console.log("<<<< in click");
break;
case 'enter' :
console.log("<<<< in enter");
await page.type(intermediatePage.username, secrets[0].id);
break;
};
// All intermediate pages need a click and then they can navigate to another page/final required page.
await Promise.all([
page.waitForNavigation({'timeout':timeout, 'waitUntil':['load','networkidle0']}),
page.click(intermediatePage.element),
]);
intermediatePage = getIntermediatePage(page.url())
console.log("<<<< Page URL at the end.... "+page.url());
}
const html = await page.content();
console.log(html);
return response;
} catch (e) {
console.log('<<<<< ERROR' + e);
} finally {
}
};
该循环不适用于多个中间页面。它给出了错误
Execution context was destroyed, most likely because of a navigation.
如何仍然导航到结束页面?