我正在玩NodeJS,为此创建了一个电子邮件提取器。不知何故,当我创建多个http请求时,Windows任务管理器中的node.exe内存使用量不断增加。我知道节点需要更多的内存来处理请求,但我注意到即使在所有请求都已成功处理之后,这种内存使用量也没有下降。
当我启动nodejs时,它消耗大约35000K的内存,但在大约80-100的请求之后,它会上升到50000K并保持不变。
这是我的简单电子邮件提取程序模块:
var request = require('request'),
cheerio = require('cheerio'),
async = require('async'),
urlHelper = require('url');
function Extractor(config) {
this.baseUrl = config.url;
this.parsedUrl = urlHelper.parse(config.url);
this.urls = [];
this.emails = [];
}
Extractor.prototype.getEmails = function getEmails(html) {
var foundEmails = html.match(/([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+)/gi) || [];
if(foundEmails.length) this.emails = this.emails.concat(foundEmails);
}
Extractor.prototype.extract = function extract(html) {
var $ = cheerio.load(html),
that = this;
if($('body')){
this.getEmails($('body').html());
}
if(!this.emails.length){
$("a[href^='http://" + this.parsedUrl.host + "'], a[href^='https://" + this.parsedUrl.host + "'], a[href^='/'], a[href^='./'], a[href^='../']").each(function(k, v) {
that.urls.push(urlHelper.resolve(that.baseUrl, $(v).attr('href')));
});
}
};
/**
* Process the base URL
*/
Extractor.prototype.processBase = function processBase(next) {
request(this.baseUrl, function(err, response, body) {
return next(err, body);
});
}
/**
* Process the internal pages
*/
Extractor.prototype.processInternal = function processInternal(cb) {
var that = this;
async.whilst(
// while this condition returns true
function () { return that.emails.length === 0 && that.urls.length > 0; },
// do this
function (callback) {
request(that.urls.shift(), function (err, response, body) {
var $ = cheerio.load(body);
if($(body)){
that.getEmails($('body').html());
}
callback(); // async internal, needs to be called after we are done with our thing
});
},
// call this if any errors occur. An error also stops the series
// this is also called on successful completion of the series
function (err) {
cb(that);
}
);
}
Extractor.prototype.process = function process(next) {
var that = this;
this.processBase(function(err, html) {
if(err) {
console.log(err);
} else {
that.extract(html);
if(!that.emails.length) {
that.processInternal(function(res) {
return next(null, that);
});
}
}
});
}
module.exports = Extractor;
以下是我的称呼方式:
var express = require('express');
var router = express.Router();
var Extractor = require('../services/Extractor');
router.get('/', function(req, res) {
res.json({msg: 'okay'});
var extractor = new Extractor({url: 'http://lior-197784.use1-2.nitrousbox.com:4000/crawl'});
extractor.process(function(err, res) {});
});
module.exports = router;