我的代码非常简单,爬虫对象是一个木偶操作者实例:
crawler.selectorReturner = async function(page, selector) {
await page.waitForSelector(selector);
var returnSelector = async function(){
return selector;
}
await page.exposeFunction('returnSelector', returnSelector);
var getSelections = await page.evaluate(
() => {
var resultsobj = {
selections: []
};
var selector = returnSelector();
var selections = Array.from(document.body.querySelectorAll(selector), ({ selected }) => { return selected; });
resultsobj.selections = selections;
return resultsobj;
}
);
return getSelections;
}
我如何使用crawler.selectorReturner
的示例 const initialhrefsObj = await crawler.selectorReturner(page,"a[href]");
但是我尝试了各种各样的方法,我无法让document.body.querySelectorAll(selector)工作
Evaluation failed: DOMException: Failed to execute 'querySelectorAll' on 'Element': '[object Promise]' is not a valid selector.
我显然不希望硬编码的选择器有一堆函数我希望将它传递给页面评估。如何传入一个字符串并让该字符串在我的page.evaluate中可用?
答案 0 :(得分:1)
#evaluate在位置2之前采用可变参数,因此您可以自由地传递选择器。这个应该看起来像:
var getSelections = await page.evaluate(
(selectorString) => {
var resultsobj = {
selections: []
};
var selector = returnSelector(selectorString);
var selections = Array.from(document.body.querySelectorAll(selector), ({ selected }) => { return selected; });
resultsobj.selections = selections;
return resultsobj;
},
'.my-selector' // Selector argument here
);