我正在尝试使用以下代码片段抓取 YouTube 视频页面:
let videos = await page.evaluate(async(scroll_delay) => {
function sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
let delay = scroll_delay
let preCount = 0;
let postCount = 0;
let nodes = []
do {
preCount = Array.from(document.querySelector('div#items.style-scope.ytd-grid-renderer').childNodes).length
document.querySelector('div#items.style-scope.ytd-grid-renderer').scrollIntoView({
behavior: 'smooth',
block: 'end',
inline: 'end'
})
await sleep(delay);
nodes = Array.from(document.querySelector('div#items.style-scope.ytd-grid-renderer').childNodes)
postCount = nodes.length
} while (postCount > preCount)
function praseJson(node) {
return {
}
}
videosObject = nodes.map(praseJson)
return videosObject
}, 500)
此代码在本地运行。我尝试在 EC2 机器上模拟 Lambda 行为,它同时适用于我使用相同版本的 Chrome 和 puppeteer。但是当我尝试在 lambda 上运行它时,它不断抛出这个错误。我还添加了 page.onerror
以尝试捕获浏览器端抛出的错误,但没有任何结果。
2021-07-22T01:26:10.428Z cf56ce0c-b56c-4d46-97a8-926bd907043b ERROR Error: Evaluation failed: ReferenceError: o is not defined
at t (__puppeteer_evaluation_script__:1:26)
at __puppeteer_evaluation_script__:1:314
at ExecutionContext._evaluateInternal (/var/task/node_modules/puppeteer-core/lib/cjs/puppeteer/common/ExecutionContext.js:217:19)
at processTicksAndRejections (internal/process/task_queues.js:97:5)
at ExecutionContext.evaluate (/var/task/node_modules/puppeteer-core/lib/cjs/puppeteer/common/ExecutionContext.js:106:16)