我正在尝试访问iframe的.contentWindow.document(特别是通过google在页面上投放的广告)。
如果您通过Chrome在控制台中执行此操作,它会按预期返回。例如,
var t = document.getElementById('google_ads_iframe_175840252/90-min/Homepage/Index/Top_0').contentWindow.document;
window.onload=t
当使用无头镀铬执行相同的通过木偶操作时,返回具有iframe属性的时髦Json。例如:
{ title:
{ location:
{ replace: {},
assign: {},
href: 'https://tpc.googlesyndication.com/safeframe/1-0-27/html/container.html',
ancestorOrigins: [Object],
origin: 'https://tpc.googlesyndication.com',
protocol: 'https:',
host: 'tpc.googlesyndication.com',
hostname: 'tpc.googlesyndication.com',
port: '',
pathname: '/safeframe/1-0-27/html/container.html',
search: '',
hash: '',
reload: {},
toString: {} },
closure_lm_292767: null,
'4CGeArbVQ': 100 } }
我对木偶操作者来说很新,任何建议都有帮助,我会在下面提供我的木偶操作码。
const puppeteer = require('puppeteer');
let scrape = async () => {
const browser = await puppeteer.launch({
args: [
'--start-maximized','--disable-web-security',
],
headless: false,
//slowMo: 600,
userDataDir: 'test-profile-dir',
});
const page = await browser.newPage();
try {
await page.setViewport({width: 1920, height: 1080});
await page.setUserAgent('Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36');
await page.goto('https://www.90min.com/');
await page.waitFor(2000);
} catch(error) {
console.error(error);
}
try {
const result = await page.evaluate(() => {
let title = document.getElementById('google_ads_iframe_175840252/90-min/Homepage/Index/Top_0').contentWindow.document;
window.onload= title;
return {
title
}
});
browser.close();
return result;
} catch(error) {
console.error(error);
}
};
scrape().then((value) => {
console.log(value); // Success!
});
答案 0 :(得分:2)
使用.contentWindow.document.childNodes [" 0"]。innerHTML找到答案;在puppeteer中会带回多汁的iframe内容。