我正在使用Puppeteer尝试在所有图片加载后尝试截取网站,但无法使其正常工作。
以下是我到目前为止所使用的代码,我使用https://www.digg.com作为示例网站:
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto('https://www.digg.com/');
await page.setViewport({width: 1640, height: 800});
await page.evaluate(() => {
return Promise.resolve(window.scrollTo(0,document.body.scrollHeight));
});
await page.waitFor(1000);
await page.evaluate(() => {
var images = document.querySelectorAll('img');
function preLoad() {
var promises = [];
function loadImage(img) {
return new Promise(function(resolve,reject) {
if (img.complete) {
resolve(img)
}
img.onload = function() {
resolve(img);
};
img.onerror = function(e) {
resolve(img);
};
})
}
for (var i = 0; i < images.length; i++)
{
promises.push(loadImage(images[i]));
}
return Promise.all(promises);
}
return preLoad();
});
await page.screenshot({path: 'digg.png', fullPage: true});
browser.close();
})();
答案 0 :(得分:46)
有一个built-in option:
await page.goto('https://www.digg.com/', {"waitUntil" : "networkidle0"});
networkidle0 - 考虑在至少500毫秒不超过0个网络连接时完成导航
networkidle2 - 考虑在至少500毫秒不超过2个网络连接时完成导航。
P.S。当然,如果您正在使用无限滚动的单页应用程序(如Twitter),它将无法工作。
答案 1 :(得分:10)
另一个选项,实际上是评估在加载所有图像时获得回调
此选项也适用于不支持等待networkidle0
选项的setContent
await page.evaluate(async () => {
const selectors = Array.from(document.querySelectorAll("img"));
await Promise.all(selectors.map(img => {
if (img.complete) return;
return new Promise((resolve, reject) => {
img.addEventListener('load', resolve);
img.addEventListener('error', reject);
});
}));
})
答案 2 :(得分:5)
您可能要考虑先使用Element.scrollIntoView()
之类的方法向下滚动以解决延迟加载图片的问题:
WARN Cannot stringify a function transformRequest 14:52:50
WARN Cannot stringify a function transformResponse 14:52:50
WARN Cannot stringify a function httpAdapter 14:52:50
WARN Cannot stringify a function validateStatus 14:52:50
WARN Cannot stringify arbitrary non-POJOs ClientRequest 14:52:50
WARN Cannot stringify a function 14:52:50
WARN Cannot stringify a function transformRequest 14:52:50
WARN Cannot stringify a function transformResponse 14:52:50
WARN Cannot stringify a function httpAdapter 14:52:50
WARN Cannot stringify a function validateStatus 14:52:50
WARN Cannot stringify arbitrary non-POJOs ClientRequest 14:52:50
WARN Cannot stringify a function 14:52:50
答案 3 :(得分:0)
我面临同样的问题。 我有一种感觉,解决方案将涉及使用:
await page.setRequestInterceptionEnabled(true);
page.on('request', interceptedRequest => {
//some code here that adds this request to ...
//a list and checks whether all list items have ...
//been successfully completed!
});
答案 4 :(得分:0)
我发现使用 page.setViewPort(...)方法适用于多个站点的解决方案,如下所示:
const puppeteer = require('puppeteer');
async(() => {
const browser = await puppeteer.launch({
headless: true, // Set to false while development
defaultViewport: null,
args: [
'--no-sandbox',
'--start-maximized', // Start in maximized state
],
});
const page = await = browser.newPage();
await page.goto('https://www.digg.com/', {
waitUntil: 'networkidle0', timeout: 0
});
// Get scroll width and height of the rendered page and set viewport
const bodyWidth = await page.evaluate(() => document.body.scrollWidth);
const bodyHeight = await page.evaluate(() => document.body.scrollHeight);
await page.setViewport({ width: bodyWidth, height: bodyHeight });
await page.waitFor(1000);
await page.screenshot({path: 'digg-example.png' });
})();