我正在尝试对此page生成的弹出窗口接受cookie同意。我尝试使用waitForSelector
,但无头浏览器似乎看不到我使用的选择器。我想实际切换到“是”,然后提交表格。我猜它显示在window.onload
上,所以也许这需要使用JavaScript来完成?
import asyncio
import time
from pyppeteer import launch
from pyppeteer.errors import TimeoutError
from urllib.parse import urlparse
URLS = [
'https://www.trustarc.com/'
]
start = time.time()
async def fetch(url, browser):
page = await browser.newPage()
try:
#await page.setRequestInterception(True)
page.on('request', callback)
await page.goto(url, {'waitUntil': 'networkidle0'})
await page.screenshot({'path': f'img/{urlparse(url)[1]}.png', 'fullPage': True})
except TimeoutError as e:
print(f'Timeout for: {url}')
finally:
await page.close()
async def callback(req):
print(f'Request: {req.url}')
async def run():
browser = await launch(headless=True, args=['--no-sandbox'])
tasks = []
for url in URLS:
task = asyncio.ensure_future(fetch(url, browser))
tasks.append(task)
ret = await asyncio.gather(*tasks)
await browser.close()
loop = asyncio.get_event_loop()
future = asyncio.ensure_future(run())
loop.run_until_complete(future)
print(f'It took {time.time()-start} seconds.')
答案 0 :(得分:1)
如果“ Cookie Preferences”(Cookie首选项)弹出框没有自动打开,则可以通过单击网页右下角的按钮来手动打开弹出窗口。
Cookie选项位于iframe
中,因此您必须等到框架内容加载完毕后,才能为功能性Cookie和广告Cookie选择“是”。
提交首选项后,您需要等待并关闭确认消息才能继续使用网站。
完整示例:
// Navigate to the website
await page.goto( 'https://www.trustarc.com/', { 'waitUntil' : 'networkidle0' } );
// Open the Cookie Preferences pop-up (if necessary)
if ( await page.$( '.truste_overlay' ) === null )
{
await page.click( '#teconsent > a' );
}
// Wait for the Cookie Preferences frame and content to load
const cookies_frame = page.frames()[1];
await cookies_frame.waitForSelector( '.active', { 'visible' : true } );
// Fill out and submit form
await cookies_frame.evaluate( () =>
{
const yes_buttons = document.getElementsByClassName( 'off' );
const submit_button = document.getElementsByClassName( 'submit' )[0];
yes_buttons[0].click();
yes_buttons[1].click();
submit_button.click();
});
// Wait for and close confirmation
const close_button = await cookies_frame.waitForSelector( '#gwt-debug-close_id' );
await close_button.click();
答案 1 :(得分:1)
万一有人发现这很有用,这是我基于可接受答案的Python实现:
import asyncio
import time
from pyppeteer import launch
from pyppeteer.errors import TimeoutError
from urllib.parse import urlparse
URLS = [
'https://www.trustarc.com/'
]
start = time.time()
async def fetch(url, browser):
page = await browser.newPage()
try:
#await page.setRequestInterception(True)
#page.on('request', callback)
await page.goto(url, {'waitUntil': 'networkidle0'})
if not await page.J('.truste_overlay'):
await page.click('#teconsent > a')
cookies_frame = page.frames[1]
await cookies_frame.waitForSelector( '.active', {'visible': True})
await cookies_frame.evaluate('''() =>
{
const yes_buttons = document.getElementsByClassName( 'off' );
const submit_button = document.getElementsByClassName( 'submit' )[0];
yes_buttons[0].click();
yes_buttons[1].click();
submit_button.click();
}''')
close_button = await cookies_frame.waitForSelector( '#gwt-debug-close_id' )
await close_button.click()
await page.screenshot({'path': f'img/{urlparse(url)[1]}.png', 'fullPage': True})
except TimeoutError as e:
print(f'Timeout for: {url}')
finally:
await page.close()
async def callback(req):
print(f'Request: {req.url}')
async def run():
browser = await launch(headless=True, args=['--no-sandbox'])
tasks = []
for url in URLS:
task = asyncio.ensure_future(fetch(url, browser))
tasks.append(task)
ret = await asyncio.gather(*tasks)
await browser.close()
loop = asyncio.get_event_loop()
future = asyncio.ensure_future(run())
loop.run_until_complete(future)
print(f'It took {time.time()-start} seconds.')