人偶无法识别链接

时间:2019-02-07 18:17:37

标签: node.js puppeteer

我正在尝试获取一些html表达式,但是以某种方式我遇到了错误

Error: Evaluation failed: ReferenceError: link is not defined
    at __puppeteer_evaluation_script__:8:29
    at ExecutionContext.evaluateHandle (C:\Repositories\auto-grabber-server\node_modules\puppeteer\lib\ExecutionContext.js:124:13)
    at process._tickCallback (internal/process/next_tick.js:68:7)
  -- ASYNC --
    at ExecutionContext.<anonymous> (C:\Repositories\auto-grabber-server\node_modules\puppeteer\lib\helper.js:144:27)
    at ExecutionContext.evaluate (C:\Repositories\auto-grabber-server\node_modules\puppeteer\lib\ExecutionContext.js:58:31)
    at ExecutionContext.<anonymous> (C:\Repositories\auto-grabber-server\node_modules\puppeteer\lib\helper.js:145:23)
    at Frame.evaluate (C:\Repositories\auto-grabber-server\node_modules\puppeteer\lib\FrameManager.js:447:20)
    at process._tickCallback (internal/process/next_tick.js:68:7)
  -- ASYNC --
    at Frame.<anonymous> (C:\Repositories\auto-grabber-server\node_modules\puppeteer\lib\helper.js:144:27)
    at Page.evaluate (C:\Repositories\auto-grabber-server\node_modules\puppeteer\lib\Page.js:777:43)
    at Page.<anonymous> (C:\Repositories\auto-grabber-server\node_modules\puppeteer\lib\helper.js:145:23)
    at zrGrabber.StartGrabbingHtml (C:\Repositories\auto-grabber-server\grabbers\zr.grabber.js:52:40)
    at process._tickCallback (internal/process/next_tick.js:68:7)

链接已传递给StartGrabbingHtml函数,但随后却出现错误。我想异步工作人员出了点问题,但无法确切获得。

const puppeteer = require("puppeteer");
let links = [];
const Mongo = require('./../db/mongo');
const zrLinks = [
    "https://www.zr.ru/stories/consultant/optimalno/",
    "https://www.zr.ru/news/avtomobili/",
    "https://www.zr.ru/stories/prezentaciya-car/new/"
];

module.exports = class zrGrabber {
    async startGrabbingLinks() {
        try {
            for (let i = 0; i < zrLinks.length; i++) {
                const browser = await puppeteer.launch();
                const page = await browser.newPage();
                await page.goto(zrLinks[i], {
                    waitUntil: 'load',
                    timeout: 0
                });
                const result = await page.evaluate(() => {
                    const links = document.querySelectorAll('div.head > h2 > a')
                    return [...links].map(link => link.href);
                });
                await page.close();
                await browser.close();
                links = [...links, ...result];
            }
            const db = new Mongo();
            for (let i = 0; i < links.length; i++) {
                // if link already in database skip grabbing
                const found = await db.findLink(links[i]);
                if (found) {
                    continue;
                }
                // else grab and write link to database
                await this.StartGrabbingHtml(links[i])
            }
        } catch (err) {
            console.log(err)
        }
    }

    async StartGrabbingHtml(link) {
        try {
            const browser = await puppeteer.launch();
            const page = await browser.newPage();
            await page.goto(link, {
                waitUntil: 'load',
                timeout: 0
            });
            const article = await page.evaluate(() => { // error throwing here
                const date = document.querySelector('#storyDetailArticle > time').innerHTML;
                const name = document.querySelector('#storyDetailArticle > h1').innerHTML;
                const description = document.querySelector('#storyDetailArticle > div.stroy_announcement > h3').innerHTML;
                const author = document.querySelector('#storyDetailArticle > div.announcement_author.story_author.no_preview > div').innerHTML;
                const content = document.querySelector('#storyDetailArticle > div.stroy_content').innerHTML;
                return {
                    source: link,
                    date: date,
                    name: name,
                    description: description,
                    author: author,
                    content: content
                };
            });
            console.log(article)
            const db = new Mongo();
            await db.insertOne(article);
            await page.close();
            await browser.close();
        } catch (err) {
            console.log(err)
        }
    }
}

我在这里做错了什么?

1 个答案:

答案 0 :(得分:1)

脚本无法从page.evaluate上下文内部访问变量link

您应该将其作为这样的参数传递:

await page.evaluate(link => {
    // ...
}, link);