循环时无法点击元素

时间:2019-01-25 04:49:08

标签: web-scraping puppeteer

我试图遍历所有元素,单击它们,等待一段时间以使其加载,然后再收集一些信息。但是由于某种原因,它只单击所有迭代的第一个元素。

const result = await page.evaluate(async () => {
    const data = [];
    const elements = document.querySelectorAll('.calendar-available');
    for (const element of elements) {
        data_sub = [];
        element.click();  
        await new Promise((resolve) => setTimeout(resolve, 2000));
        let columns = document.querySelectorAll('.col-md-6');
        for(i = 2; i < columns.length; i++){
            let info = columns[i].innerText;
            data_sub.push(info);
        }         
        data.push(data_sub);
    }    
    return data;
}); 

1 个答案:

答案 0 :(得分:0)

好吧,看来您的代码是有效的,但是每次单击后,DOM树都会以某种方式重新分配,循环中的下一个元素引用相同的第一个元素。我们可以通过以下方式解决此问题(更改的行用注释标记):

'use strict';

const puppeteer = require('puppeteer');

(async function main() {
  try {
    const browser = await puppeteer.launch({ headless: false });
    const [page] = await browser.pages();

    await page.goto('https://reslife.ucla.edu/reserve');

    const result = await page.evaluate(async () => {
        document.querySelector('.reserve-grid .col-md-4 input').click();
        await new Promise((resolve) => setTimeout(resolve, 2000));
        document.querySelector('.reserve-grid .col-md-6 input').click();
        await new Promise((resolve) => setTimeout(resolve, 2000));

        const data = [];
        const length = document.querySelectorAll('.calendar-available').length; // <-
        for (let n = 0; n < length; n++) { // <-
            const element = document.querySelectorAll('.calendar-available').item(n); // <-
            data_sub = [];
            element.click();
            await new Promise((resolve) => setTimeout(resolve, 2000));
            let columns = document.querySelectorAll('.col-md-6');
            for(i = 2; i < columns.length; i++){
                let info = columns[i].innerText;
                data_sub.push(info);
            }
            data.push(data_sub);
        }
        return data;
    });

    console.log(result);

    await browser.close();
  } catch (err) {
    console.error(err);
  }
})();

或者,我们可以使用这些行代替标记的行

        const labels = [...document.querySelectorAll('.calendar-available')].map(el => el.getAttribute('aria-label')); // <-
        for (const label of labels) { // <-
            const element = document.querySelector(`.calendar-available[aria-label="${label}"]`); // <-