我要抓取多个网址
const puppeteer = require('puppeteer');
let scrape = async () => {
const browser = await puppeteer.launch({headless: false,userDataDir: "./user_data"});
let elements = ['https://tr.pinterest.com/gamzeeerkek','https://tr.pinterest.com/jislaynekauany_']
const result = await page.evaluate(() => {
for(let url of elements)
{
let page = await browser.newPage();
await page.goto(url);
await page.waitFor(1000);
let title = document.querySelector('.lH1').innerText;
let title1 = document.getElementsByClassName('tBJ')[1].innerText;
data.push({title, title1});
}
return data; // Return our data array
});
browser.close();
return result; // Return the data
};
scrape().then((value) => {
console.log(value); // Success!
});
我的错误是:
让页面=等待browser.newPage(); ^^^^^
SyntaxError:等待仅在异步功能中有效
答案 0 :(得分:0)
您看到此错误,是因为您在await
之外使用async
。另外,由于上下文和语法错误,即使您添加了async
关键字,您也会遇到一些错误并且脚本将无法正常工作。
这是脚本:
const scrape = async () => {
const browser = await puppeteer.launch({headless: false,userDataDir: "./user_data"});
const data = [];
const urls = ['https://tr.pinterest.com/gamzeeerkek','https://tr.pinterest.com/jislaynekauany_']
const page = await browser.newPage();
for (const url of urls) {
await page.goto(url);
await page.waitForSelector('.lH1');
const result = await page.evaluate(() => {
const title = document.querySelector('.lH1').innerText;
const title1 = document.getElementsByClassName('tBJ')[1].innerText;
return {title, title1};
});
data.push(result)
}
await browser.close();
return data; // Return the data
};
scrape().then((value) => {
console.log(value); // Success!
});
输出:
[{title:'Gamze Erkek',title1:'Follow'}, {title:'Jislayne',title1:'Follow'}]