这段代码有可能丢失一些匹配吗?

时间:2015-06-28 12:58:48

标签: javascript node.js

在我的NodeJS学习过程中,我在一本书(NodeJS in Practice)中找到了这个示例代码,该书使用流来查找来自另一个流的数据中的一些匹配。

var Writable = require('stream').Writable;
var util = require('util');
module.exports = CountStream;
util.inherits(CountStream, Writable);

function CountStream(matchText, options) {
    Writable.call(this, options);
    this.count = 0;
    this.matcher = new RegExp(matchText, 'ig');
}

CountStream.prototype._write = function(chunk, encoding, cb) {
    var matches = chunk.toString().match(this.matcher);
    if (matches) {
        this.count += matches.length;
    }
    cb();
};

CountStream.prototype.end = function() {
    this.emit('total', this.count);
};

使用流的代码:

var CountStream = require('./countstream');
var countStream = new CountStream('book');
var http = require('http');

http.get('http://www.manning.com', function(res) {
    res.pipe(countStream);
});

countStream.on('total', function(count) {
    console.log('Total matches:', count);
});

如果匹配在两个数据块中中断,是不是可能丢失一些匹配?

例如,第一块数据包含'This a bo',另一块包含'ok of mine。'没有人没有独立但整个数据包含

找到所有比赛的最佳解决方案是什么?

1 个答案:

答案 0 :(得分:1)

所以,就像我在评论中解释的那样,如果你知道正则表达式匹配的字符串的最大长度(计算最大长度,请参阅https://stackoverflow.com/a/31173778/4114922的非常好的答案),你可以缓存前一个块并将其连接到新块。 通过这种方法,我认为你不会输掉任何比赛。

var Writable = require('stream').Writable;
var util = require('util');
module.exports = CountStream;
util.inherits(CountStream, Writable);

function CountStream(matchText, maxPatternLength, options) {
    Writable.call(this, options);
    this.count = 0;
    this.matcher = new RegExp(matchText, 'ig');

    this.previousCache = undefined;
    this.maxPatternLength = maxPatternLength;
}

CountStream.prototype._write = function(chunk, encoding, cb) {
    var text;
    if(this.previousCache === undefined) {
        text = chunk.toString();
    }
    else {
        text = this.previousCache + chunk.toString();
    }
    var matches = text.match(this.matcher);
    if (matches) {
        this.count += matches.length;
    }

    this.previousCache = text.substring(text.length - this.maxPatternLength);

    cb();
};

CountStream.prototype.end = function() {
    this.emit('total', this.count);
};