Question

在我的NodeJS学习过程中，我在一本书（NodeJS in Practice）中找到了这个示例代码，该书使用流来查找来自另一个流的数据中的一些匹配。

var Writable = require('stream').Writable;
var util = require('util');
module.exports = CountStream;
util.inherits(CountStream, Writable);

function CountStream(matchText, options) {
    Writable.call(this, options);
    this.count = 0;
    this.matcher = new RegExp(matchText, 'ig');
}

CountStream.prototype._write = function(chunk, encoding, cb) {
    var matches = chunk.toString().match(this.matcher);
    if (matches) {
        this.count += matches.length;
    }
    cb();
};

CountStream.prototype.end = function() {
    this.emit('total', this.count);
};

使用流的代码：

var CountStream = require('./countstream');
var countStream = new CountStream('book');
var http = require('http');

http.get('http://www.manning.com', function(res) {
    res.pipe(countStream);
});

countStream.on('total', function(count) {
    console.log('Total matches:', count);
});

如果匹配在两个数据块中中断，是不是可能丢失一些匹配？

例如，第一块数据包含'This a bo'，另一块包含'ok of mine。'没有人没有书独立但整个数据包含书。

找到所有比赛的最佳解决方案是什么？

Answer 1

所以，就像我在评论中解释的那样，如果你知道正则表达式匹配的字符串的最大长度（计算最大长度，请参阅https://stackoverflow.com/a/31173778/4114922的非常好的答案），你可以缓存前一个块并将其连接到新块。通过这种方法，我认为你不会输掉任何比赛。

var Writable = require('stream').Writable;
var util = require('util');
module.exports = CountStream;
util.inherits(CountStream, Writable);

function CountStream(matchText, maxPatternLength, options) {
    Writable.call(this, options);
    this.count = 0;
    this.matcher = new RegExp(matchText, 'ig');

    this.previousCache = undefined;
    this.maxPatternLength = maxPatternLength;
}

CountStream.prototype._write = function(chunk, encoding, cb) {
    var text;
    if(this.previousCache === undefined) {
        text = chunk.toString();
    }
    else {
        text = this.previousCache + chunk.toString();
    }
    var matches = text.match(this.matcher);
    if (matches) {
        this.count += matches.length;
    }

    this.previousCache = text.substring(text.length - this.maxPatternLength);

    cb();
};

CountStream.prototype.end = function() {
    this.emit('total', this.count);
};

这段代码有可能丢失一些匹配吗？

1 个答案: