在我的NodeJS学习过程中,我在一本书(NodeJS in Practice)中找到了这个示例代码,该书使用流来查找来自另一个流的数据中的一些匹配。
var Writable = require('stream').Writable;
var util = require('util');
module.exports = CountStream;
util.inherits(CountStream, Writable);
function CountStream(matchText, options) {
Writable.call(this, options);
this.count = 0;
this.matcher = new RegExp(matchText, 'ig');
}
CountStream.prototype._write = function(chunk, encoding, cb) {
var matches = chunk.toString().match(this.matcher);
if (matches) {
this.count += matches.length;
}
cb();
};
CountStream.prototype.end = function() {
this.emit('total', this.count);
};
使用流的代码:
var CountStream = require('./countstream');
var countStream = new CountStream('book');
var http = require('http');
http.get('http://www.manning.com', function(res) {
res.pipe(countStream);
});
countStream.on('total', function(count) {
console.log('Total matches:', count);
});
如果匹配在两个数据块中中断,是不是可能丢失一些匹配?
例如,第一块数据包含'This a bo',另一块包含'ok of mine。'没有人没有书独立但整个数据包含书。
找到所有比赛的最佳解决方案是什么?
答案 0 :(得分:1)
所以,就像我在评论中解释的那样,如果你知道正则表达式匹配的字符串的最大长度(计算最大长度,请参阅https://stackoverflow.com/a/31173778/4114922的非常好的答案),你可以缓存前一个块并将其连接到新块。 通过这种方法,我认为你不会输掉任何比赛。
var Writable = require('stream').Writable;
var util = require('util');
module.exports = CountStream;
util.inherits(CountStream, Writable);
function CountStream(matchText, maxPatternLength, options) {
Writable.call(this, options);
this.count = 0;
this.matcher = new RegExp(matchText, 'ig');
this.previousCache = undefined;
this.maxPatternLength = maxPatternLength;
}
CountStream.prototype._write = function(chunk, encoding, cb) {
var text;
if(this.previousCache === undefined) {
text = chunk.toString();
}
else {
text = this.previousCache + chunk.toString();
}
var matches = text.match(this.matcher);
if (matches) {
this.count += matches.length;
}
this.previousCache = text.substring(text.length - this.maxPatternLength);
cb();
};
CountStream.prototype.end = function() {
this.emit('total', this.count);
};