我正在尝试使用for
-of
循环方法来迭代URL数组,并将其与page.evaluate
(伪音库)功能一起使用。我将发布代码片段(单个URL版本),以便此处的一个人可以更好地向我解释如何循环该代码。我想定义一个URL数组(也许是在Json中定义),以便每次我想在学校科目之间切换时都使用而不是替换第二个URL
const puppeteer = require('puppeteer');
let scrape = async () => {
const browser = await puppeteer.launch({headless: true});
const page = await browser.newPage();
await page.goto('https://marconi-pr.registroelettronico.com/quaderno/'); //go to login page
await page.waitFor(2000);
await page.click('#ext-container-6 > div.x-size-monitors.scroll'); //skip some ads
await page.waitFor(2000);
await page.type('#ext-element-21', 'user'); //user field
await page.waitFor(1000);
await page.type('#ext-element-27', 'password'); //pwd field
await page.click('#btnAccedi'); //login button
await page.waitFor(5000);
await page.goto('https://marconi-pr.registroelettronico.com/mastercom/register_manager.php?_dc=1537450418307&id_materia=1000188&action=get_assignments_subject&page=1&start=0&limit=25'); //after login i want to access this
const html = await page.evaluate(el => el.innerHTML, await page.$('pre')); //copying some things
await browser.close();
return html;
};
scrape().then((html) => {
html = JSON.parse(html); //parsing as json
console.log(html); // Success!
});
答案 0 :(得分:1)
使用以下方法登录后,可以在URL数组上使用page.evaluate()
:
'use strict';
const puppeteer = require( 'puppeteer' );
const urls = [
'https://www.example.com/page-1',
'https://www.example.com/page-2',
'https://www.example.com/page-3'
];
( async () =>
{
const browser = await puppeteer.launch( { headless : true } );
const page = await browser.newPage();
let result = '';
await page.goto( 'https://www.example.com/login' );
await page.type( '#username', 'username' );
await page.type( '#password', 'password' );
await page.click( '#submit' );
await page.waitForNavigation();
for ( let i = 0; i < urls.length; i++ )
{
await page.goto( urls[i] );
result = await page.evaluate( () => document.getElementById( 'example' ).innerHTML );
console.log( result );
}
await browser.close();
})();