使用节点对网址进行屏幕截图-异步

时间:2018-09-17 08:06:51

标签: node.js

这是我的代码:

const fs = require('fs');
const screenshot = require('screenshot-stream');
const urlp = require('url');
var urls=[
'https://archive.org/details/8bitrecs',
'http://hackaday.com/',
'http://techcrunch.com/2012/02/16/auraslate-is-an-open-source-android-tablet-for-hackers/',
'http://www.english.illinois.edu/-people-/faculty/debaron/482/482readings/greenfield.html',
'http://sustain.rca.ac.uk/Sustain-Talks'];
urls.forEach(function(url){
    const stream = screenshot(url, '1024x768', {crop: true});
    stream.pipe(fs.createWriteStream(urlp.parse(url).hostname + 'test-1024x768.png')); 
});

它仅截取网址中的最后一项。其他的是具有零字节的图像。我认为我需要异步进行操作,以免每次都不会覆盖流。

我该怎么做?

更新: 我希望屏幕截图能够正常工作,但是如果无法访问网址,则会捕获错误并且不会阻止

更新: https://www.npmjs.com/package/screenshot-promise的效果更好,尽管下面的这段代码仍然使我的计算机运行速度大大降低!

const screenshotPromise = require('screenshot-promise');

...

urls.forEach(function(url) {



const promise = screenshotPromise(url, '1024x768', {crop: true}).then(buf => {
    fs.writeFileSync(urlp.parse(url).hostname + 'test-1024x768.png', buf);
});


promise.then((value) => {
  // value is whatever we passed in the resolve(...) function above.
  // It doesn't have to be a string, but if it is only a succeed message, it probably will be.
  console.log(value);
});

2 个答案:

答案 0 :(得分:1)

您无法在此处添加的内容是错误:

  

events.js:160
        投掷者//未处理的“错误”事件         ^

     

错误:无法加载网址:http://techcrunch.com/2012/02/16/auraslate-is-an-open-source-android-tablet-for-hackers/
      在LineStream.byline.on.data
   (e:--- \ node_modules \ screenshot-stream \ index.js:77:16)
      在emitOne(events.js:96:13)
      在LineStream.emit(events.js:188:7)

问题在于模块screenshot-stream使用的是PhantomJS,而phantomJS无法进入输出错误的页面。

此错误似乎与以下问题有关:https://github.com/ariya/phantomjs/issues/10460

  

Techcrunch.com和Aol.com似乎使用了Qt加载为应用程序字体的网络字体(例如“ BebasNeue-webfont.ttf”)。那里可能出了什么问题。

我的建议是使用Google的Puppeteer,其中包括内置的屏幕截图方法:https://github.com/GoogleChrome/puppeteer/blob/master/docs/api.md#pagescreenshotoptions

答案 1 :(得分:0)

最后我做的代码起作用了

   const puppeteer = require('puppeteer');
const urlp = require('url');
var URL = require('url-parse');
var urls = [
    'https://archive.org/details/8bitrecs',
    'http://hackaday.com/',
    'http://techcrunch.com/2012/02/16/auraslate-is-an-open-source-android-tablet-for-hackers/',
    'http://www.english.illinois.edu/-people-/faculty/debaron/482/482readings/greenfield.html',
    'http://sustain.rca.ac.uk/Sustain-Talks',
    'https://www.quintessentially.com/',
    'https://www.producthunt.com/tech/ux-project-checklist',
    'https://freedom.press/',
    'http://issuu.com/search?q=vintage+motorcycle',
    'http://www.pocketmod.com/v2/',
    'https://www.metamind.io/',
    'http://nautil.us/blog/chernobyls-hot-mess-the-elephants-foot-is-still-lethal',
    'https://www.instructables.com/id/Tool-Storage-Hacks-or-How-to-Hang-Those-Black-Frid/',
    'https://www.zippi.co.uk/framed-photo-print'
];
var getLocation = function(href) {
    var l = document.createElement("a");
    l.href = href;
    return l;
};
(async() => {
    const browser = await puppeteer.launch();
    const page = await browser.newPage();
    page.waitForNavigation({
        timeout: 40000
    });
    for (let i = 0; i < urls.length; i++) {
        const url = urls[i];
        var url1 = new URL(url);
        try {
            await page.goto(`${url}`);
            await page.screenshot({
                path: 'images/' + url1.hostname + '.png'
            });
        } catch (error) {
            console.log(error.message);
            // await page.close();
            // await browser.close();
            // process.exit(1);
            continue;
        }
    }
})();