我有一组我希望PhantomJS
访问的网址,并使用async.map()
并行保存为html。鉴于我必须关闭我打开的页面以释放RAM,我相信我需要使用page.close()。我做到了
但是,我还想在完成时退出Phantom,我尝试通过async.waterfall()
实现此添加,但它立即存在。
我该怎么做?
var fs = require("fs");
var async = require("async");
var urls = [
{"url": "https://www.google.com", "html": "google"},
{"url": "http://yahoo.com", "html": "yahoo"}
];
async.waterfall([
function (callback2) {
async.map(urls, function (a, callback) {
var resourceWait = 300,
maxRenderWait = 5000,
url = a.url;
var page = require('webpage').create(),
count = 0,
forcedRenderTimeout,
renderTimeout;
page.viewportSize = {width: 1440, height: 900};
function doRender() {
var content = page.content;
var path = '../public/html/' + a.html + '.html';
fs.write(path, content, 'w');
page.close();
}
page.onResourceRequested = function (req) {
count += 1;
clearTimeout(renderTimeout);
};
page.onResourceReceived = function (res) {
if (!res.stage || res.stage === 'end') {
count -= 1;
if (count === 0) {
renderTimeout = setTimeout(doRender, resourceWait);
}
}
};
page.open(url, function (status) {
if (status !== "success") {
console.log('Unable to load url');
} else {
forcedRenderTimeout = setTimeout(function () {
doRender();
}, maxRenderWait);
}
});
callback();
});
callback2(null, 'done!');
}
], function (err, result) {
phantom.exit();
});
答案 0 :(得分:0)
async.map
需要回调作为第三个参数,可以在所有并行调用完成后调用。你需要在回调中调用callback2()
。
您的代码目前只是启动async.map并立即调用callback2()
。你应该在async.map
回调中调用它。
var fs = require("fs");
var async = require("async");
var urls = [
{"url": "https://www.google.com", "html": "google"},
{"url": "http://yahoo.com", "html": "yahoo"}
];
async.waterfall([
function (callback2) {
async.map(urls, function (a, callback) {
var resourceWait = 300,
maxRenderWait = 5000,
url = a.url;
var page = require('webpage').create(),
count = 0,
forcedRenderTimeout,
renderTimeout;
page.viewportSize = {width: 1440, height: 900};
function doRender() {
var content = page.content;
var path = '../public/html/' + a.html + '.html';
fs.write(path, content, 'w');
page.close();
}
page.onResourceRequested = function (req) {
count += 1;
clearTimeout(renderTimeout);
};
page.onResourceReceived = function (res) {
if (!res.stage || res.stage === 'end') {
count -= 1;
if (count === 0) {
renderTimeout = setTimeout(doRender, resourceWait);
}
}
};
page.open(url, function (status) {
if (status !== "success") {
console.log('Unable to load url');
} else {
forcedRenderTimeout = setTimeout(function () {
doRender();
}, maxRenderWait);
}
});
callback();
}, function() {
return callback2(null, 'done!');
});
}
], function (err, result) {
phantom.exit();
});