我有一个函数在页面上抓取一些数据,并将其存储在变量“结果”中。有几页,因此该函数包含在“ for”循环中。
const PageScraping = async(page, query) =>{
await page.goto(query, { waitUntil: "networkidle2" });
const result = await page.evaluate(() =>{
return(
[...document.querySelectorAll('.card--result__body')].map((o) => ({
type:
o.querySelector('.card__title-link') &&
o.querySelector('.card__title-link').innerText,
price:
o.querySelector('.card--result__price > span > .sr-only') &&
parseInt(o.querySelector('.card--result__price > span > .sr-only').innerText.replace('€','')),
rooms:
o.querySelector('.card__informations.card--result__informations > .card__information.card--result__information.card__information--property > .abbreviation > .sr-only') &&
parseInt(o.querySelector('.card__informations.card--result__informations > .card__information.card--result__information.card__information--property > .abbreviation > .sr-only').innerText.replace(' chambres','')),
surface:
o.querySelector('.card__information.card--result__information.card__information--property') &&
parseInt(o.querySelector('.card__information.card--result__information.card__information--property').innerText.split(' ')[4]),
postcode:
o.querySelector('.card__information.card--results__information--locality.card__information--locality') &&
o.querySelector('.card__information.card--results__information--locality.card__information--locality').innerText.split(' ')[0],
url:
o.querySelector('.card__title.card--result__title > a') &&
o.querySelector('.card__title.card--result__title > a').href,
}))
);
});
return result;
}
之后,我需要将结果存储在另一个包含所有数据的变量中(循环)。在那里,我正在使用“ push”,但出现错误:
无法读取未定义的属性“ push”
let all_results;
try{
let query = 'myurl'
await page.goto(query, { waitUntil: "networkidle2" });
let max_pages = parseInt(await page.$eval('.pagination > li:nth-last-child(2) > a > .button__label', o => o.innerText));
for(let i = 1; i < max_pages+1; i++){
console.log('page ',i,' of ',max_pages);
try{
const result = await PageScraping(page, query);
query = `urlpage${i+1}`;
all_results.push(result);
console.log(all_results);
} catch(e){
console.log('ERROR: could not reach landing page',e.message);
}
}
} catch(error) {
console.log('ERROR: could not reach landing page',error.message);
} finally {
try{
await browser.close();
}catch(browser_error){
console.log('ERROR cant close browser??', browser_error.message)
}
}
}
答案 0 :(得分:0)
以下代码正在运行。按照建议,我将“ all_results”声明为数组。
let all_results = [];
try{
let query = 'myurl'
await page.goto(query, { waitUntil: "networkidle2" });
let max_pages = parseInt(await page.$eval('.pagination > li:nth-last-child(2) > a > .button__label', o => o.innerText));
for(let i = 1; i < max_pages+1; i++){
console.log('page ',i,' of ',max_pages);
try{
const result = await PageScraping(page, query);
query = `urlpage${i+1}`;
all_results.push(result);
console.log(all_results);
} catch(e){
console.log('ERROR: could not reach landing page',e.message);
}
}
} catch(error) {
console.log('ERROR: could not reach landing page',error.message);
} finally {
try{
await browser.close();
}catch(browser_error){
console.log('ERROR cant close browser??', browser_error.message)
}
}
}