Question

我拼命想要一次性处理200个截图，我的第一次尝试是使用200次调用的简单脚本来遵循指南，

phantom.create()
  .then(function(instance) {
      console.log("1 - instance")
      phInstance = instance;
      return instance.createPage();
  })
  .then(function(page) {
      console.log("2 - page")
      sitepage = page;
    return page.open(url);
  })
  .then(function(status) {
        console.log("3 - render")
        sitepage.property('clipRect', {top: 0, left: 0, width:3000,height:890}).then(function() {
          sitepage.render(fname).then(function(finished) { 
            console.log("\t\t\t---> finished");
            sitepage.close();
            phInstance.exit();
            callback({msg: 'ok'})
            phantom.exit();
            return;
          });
        });
  })

这种方法有点有效，但它对于cpu来说真是太过分了，问题与这样一种事实有关，即这种做法会导致200个幻象过程迅速占用所有记忆。

更有利可图的方法是创建一个幻像实例，然后将其驱动打开当时只有一页并呈现它，可以用幻像脚本完成，如下所示：

  var content, counter, f, fs, grab_screen, img, lines, next_screen, page, system, url;
  page = require('webpage').create();
  system = require('system');
  fs = require('fs');
  content = '';
  lines = [];
  url = '';
  img = '';
  counter = 0;

  page.viewportSize = {
    width: 1200,
    height: 800
  };

  page.settings.userAgent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.71 Safari/537.36';

  f = fs.open("sites.txt", "r");

  content = f.read();

  lines = content.split("\n");

  grab_screen = function() {
    var site;
    site = lines[counter];
    url = 'http://' + site + '/';
    img = 'screens/' + site + '.png';
    console.log("Grabbing screen for: " + url);
    return page.open(url, function(status) {
      return window.setTimeout(function() {
        page.render(img);
        counter++;
        return next_screen();
      }, 200);
    });
  };

  next_screen = function() {
    console.log("On to " + counter + " ...");
    if (counter < lines.length) {
      return grab_screen();
    } else {
      return phantom.exit();
    }
  };

  next_screen();

所以我想知道如何使用phantomjs-node实现这一点。

Answer 1

我终于用两件事解决了我的问题：

意识到node.js不是多线程的。
使用单个虚拟实例来渲染多个网址。

这是它的出现方式：

  var webshot = function(id) {
      console.log('makeshot ', shots[id].url);
      requestSync("POST", "http://localhost:4041/options/set", { json:{ opts:JSON.stringify(shots[id].options) } });
      phInstance.createPage().then(function(_page) {
          console.log("2 - page")
          sitepage = _page;
          return _page.open(shots[id].url);
      })
      .then(function(status) {
            console.log("3 - render %s / %s", id, shots.length);
            sitepage.property('clipRect', {top: 0, left: 0, width:1500,height:220}).then(function() {
              sitepage.render(shots[id].fname).then(function(finished) { 
                console.log("\t\t\t---> finished");
                sitepage.close();
                fnames[Math.ceil(parseInt(shots[id].options.pack_id)/mt_per_snap)-1] = "localhost_" + shots[id].options.pack_id + ".png";
                if(id<shots.length-1) {
                  id += 1;
                  webshot(id);
                } else {
                  console.log("all done: %s files has been written", shots.length);
                  // invoke pdf generation for the pdf page
                  cb("files_written", {  });
                  generatePDF();

                }
                return;
              });
            });
      })
  }

所以，长话短说：我把我想要渲染的页面放在一个单独的脚本中，我在制作镜头前用变量提供，这解决了“多线程问题”，之后我有一个名为<的单个变量em> phInstance ，声明如下：

  var initPhantom = function() {
    phantom.create()
        .then(function(instance) {
            console.log("1 - instance")
            phInstance = instance;
        })
  }

记得在完成后杀死虚拟实例，否则 会留在那里并且永远吸取你的资源。

Answer 2

您可以尝试webshot之类的内容。我在async.js使用它，但有时我得Error: PhantomJS exited with return value 1。还没有找到原因。

async.map(
    links,
    function(link, cb) {
        var config = {...}; // your webshot options
        var folder = link; // make unique folder name from link?
        var file = path.join('./', 'screenshots', folder, 'screenshot.png');
        webshot(link, file, config, function(err) {
            cb(err, link);
        });
    },
    function(e, links) {
        // done
    }
);

资源：

https://www.npmjs.com/package/webshot https://www.npmjs.com/package/asyncjs

使用phantom为nodejs批量截图

2 个答案: