phantom.exit()在与async.waterfall()一起使用时立即运行

时间:2017-11-29 14:55:23

标签: asynchronous phantomjs async.js

我有一组我希望PhantomJS访问的网址,并使用async.map()并行保存为html。鉴于我必须关闭我打开的页面以释放RAM,我相信我需要使用page.close()。我做到了

但是,我还想在完成时退出Phantom,我尝试通过async.waterfall()实现此添加,但它立即存在。

我该怎么做?

var fs = require("fs");
var async = require("async");

var urls = [
  {"url": "https://www.google.com", "html": "google"},
  {"url": "http://yahoo.com", "html": "yahoo"}
];


async.waterfall([

  function (callback2) {

    async.map(urls, function (a, callback) {
      var resourceWait = 300,
        maxRenderWait = 5000,
        url = a.url;

      var page = require('webpage').create(),
        count = 0,
        forcedRenderTimeout,
        renderTimeout;

      page.viewportSize = {width: 1440, height: 900};

      function doRender() {
        var content = page.content;

        var path = '../public/html/' + a.html + '.html';
        fs.write(path, content, 'w');
        page.close();

      }

      page.onResourceRequested = function (req) {
        count += 1;
        clearTimeout(renderTimeout);
      };

      page.onResourceReceived = function (res) {
        if (!res.stage || res.stage === 'end') {
          count -= 1;
          if (count === 0) {
            renderTimeout = setTimeout(doRender, resourceWait);
          }
        }
      };

      page.open(url, function (status) {
        if (status !== "success") {
          console.log('Unable to load url');
        } else {
          forcedRenderTimeout = setTimeout(function () {
            doRender();
          }, maxRenderWait);
        }
      });
      callback();
    });

    callback2(null, 'done!');
  }
], function (err, result) {
  phantom.exit();
});  

1 个答案:

答案 0 :(得分:0)

async.map需要回调作为第三个参数,可以在所有并行调用完成后调用。你需要在回调中调用callback2()

您的代码目前只是启动async.map并立即调用callback2()。你应该在async.map回调中调用它。

var fs = require("fs");
var async = require("async");

var urls = [
  {"url": "https://www.google.com", "html": "google"},
  {"url": "http://yahoo.com", "html": "yahoo"}
];


async.waterfall([

  function (callback2) {

    async.map(urls, function (a, callback) {
      var resourceWait = 300,
        maxRenderWait = 5000,
        url = a.url;

      var page = require('webpage').create(),
        count = 0,
        forcedRenderTimeout,
        renderTimeout;

      page.viewportSize = {width: 1440, height: 900};

      function doRender() {
        var content = page.content;

        var path = '../public/html/' + a.html + '.html';
        fs.write(path, content, 'w');
        page.close();

      }

      page.onResourceRequested = function (req) {
        count += 1;
        clearTimeout(renderTimeout);
      };

      page.onResourceReceived = function (res) {
        if (!res.stage || res.stage === 'end') {
          count -= 1;
          if (count === 0) {
            renderTimeout = setTimeout(doRender, resourceWait);
          }
        }
      };

      page.open(url, function (status) {
        if (status !== "success") {
          console.log('Unable to load url');
        } else {
          forcedRenderTimeout = setTimeout(function () {
            doRender();
          }, maxRenderWait);
        }
      });
      callback();
    }, function() {
        return callback2(null, 'done!');
    });
  }
], function (err, result) {
  phantom.exit();
});