如何使用Puppeteer打印HTML文档?

时间:2018-11-10 16:51:24

标签: javascript node.js web-crawler google-chrome-devtools puppeteer

最近,我开始使用Puppeteer爬网。下面是从购物中心提取特定产品名称的代码。

const puppeteer = require('puppeteer');

(async () => {

    const width = 1600, height = 1040;

    const option = { headless: false, slowMo: true, args: [`--window-size=${width},${height}`] };

    const browser = await puppeteer.launch(option);
    const page = await browser.newPage();
    const vp = {width: width, height: height};
    await page.setViewport(vp);

    const navigationPromise = page.waitForNavigation();

    await page.goto('https://shopping.naver.com/home/p/index.nhn');
    await navigationPromise;
    await page.waitFor(2000);

    const textBoxId = 'co_srh_input';
    await page.type('.' + textBoxId, '양말', {delay: 100});
    await page.keyboard.press('Enter');

    await page.waitFor(5000);
    await page.waitForSelector('div.info > a.tit');

    const stores = await page.evaluate(() => {
        const links = Array.from(document.querySelectorAll('div.info > a.tit'));
        return links.map(link => link.innerText).slice(0, 10)   // 10개 제품만 가져오기
    });

    console.log(stores);
    await browser.close();

})();

我有一个问题。如何将爬网的结果输出到HTML文档(不使用数据库)?请使用示例代码进行解释。

2 个答案:

答案 0 :(得分:2)

fs.writeFile()

您可以使用以下write_file函数,当Promise成功或失败时,该函数返回resolvesrejectsfs.writeFile()

然后,您可以从您的匿名异步功能中await Promise,并检查是否将数据写入文件:

'use strict';

const fs = require('fs');
const puppeteer = require('puppeteer');

const write_file = (file, data) => new Promise((resolve, reject) => {
  fs.writeFile(file, data, 'utf8', error => {
    if (error) {
      console.error(error);
      reject(false);
    } else {
      resolve(true);
    }
  });
});

(async () => {

  // ...

  const stores = await page.evaluate(() => {
    return Array.from(document.querySelectorAll('div.info > a.tit'), link => link.innerText).slice(0, 10); // 10개 제품만 가져오기
  });

  if (await write_file('example.html', stores.toString()) === false) {
    console.error('Error: Unable to write stores to example.html.');
  }

  // ...

});

答案 1 :(得分:1)

我使用了blog.kowalczyk.info

上显示的内容
const puppeteer = require("puppeteer");
const fs = require("fs");

async function run() {
  const browser = await puppeteer.launch();
  const page = await browser.newPage();
    await page.goto("https://www.google.com/", { waitUntil: "networkidle2" });
    // hacky defensive move but I don't know a better way:
    // wait a bit so that the browser finishes executing JavaScript
    await page.waitFor(1 * 1000);
    const html = await page.content();
    fs.writeFileSync("index.html", html);
    await browser.close();
}

run();