我正在使用async / await与无头Chrome&并行运行10个爬虫。 puppeteer库,但我不断使用此代码获得不一致的超时。有时候所有10个网页都可以正常运行,有时我会让其中7个网页在30秒后超时。
const puppeteer = require('puppeteer')
async function test()
{
const browser = await puppeteer.launch();//{headless: false}
var sites = ['http://bbcnews.com','http://yahoo.com','http://cnn.com','http://quora.com','http://imgur.com','http://youtube.com','http://azure.com','http://kayak.com','http://medium.com','http://reddit.com'];
var requests = [];
for (let site of sites)
{
requests.push(run(browser, site))
}
results = await Promise.all(requests);
}
async function run(browser, site)
{
try
{
var page = await browser.newPage();
//await page.setUserAgent("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5")
//console.log(site);
await page.goto(site, {
/*networkIdleTimeout: 5000,
waitUntil: 'networkidle',
timeout: 30000*/
});
//var screenshot = await page.screenshot();
await page.close();
console.log('done' + site)
}
catch (err)
{
console.log(err)
return null;
}
}
test().catch(console.error.bind(console))