NodeJs Stream - 内存不足

时间:2015-09-04 01:21:32

标签: node.js out-of-memory

我正在尝试处理3亿行数据流。一旦我达到大约500万行,我得到致命错误:CALL_AND_RETRY_LAST分配失败 - 处理内存不足。 (数量因机器而异,但一致发生)。

您可以运行以下代码来查看这种情况 - 我无法判断流中固有代码中的问题。我试图愚蠢的过程,但我无法做到这一点。

是否有内存限制?我删除了所有其他代码并且已经删除了#39;这个例子确保它没有一些背压问题。

    var Readable = require('stream').Readable;
    var Writable = require('stream').Writable;
    var util = require('util');
    var tenMillion = 10000000;
    //var tenMillion = 5000000; //THIS WORKS
    var writeEvery = tenMillion / 10;

    /*
    * Create a really simple stream that will run 10 million times 
    */
    function Streamo(max) {
        Readable.call(this, { objectMode: true });
        this._currentIndex = -1;
        this._maxIndex = max;
    }

    util.inherits(Streamo, Readable);

    Streamo.prototype._read = function () {
        this._currentIndex += 1;
        if (this._currentIndex % writeEvery == 0) {
            console.log(this._currentIndex + ' of ' + this._maxIndex)
        };

        if (this._currentIndex < 0 || this._currentIndex >= this._maxIndex) {
            console.log("BOOM")
            this.push(null);
            return;
        }
        this.push(true);
    };

    /*
    * Create a really simple Writable Stream to Count 
    */

    function Counta() {
        Writable.call(this, { objectMode: true, highWaterMark: (200 * 1024) });
        this._count = 0;
    }
    util.inherits(Counta, Writable);

    Counta.prototype._write = function (chunk, enc, cb) {
        this._count++;
        if (this._count % writeEvery == 0) {
            console.log('_______________________________' + this._count)
        };
        cb();
    };

    Counta.prototype.Count = function () {
        return this._count;
    }


    /*
    * Exercise It 
    */
    var s = new Streamo(tenMillion);
    var c = new Counta();
    s.pipe(c);
    c.on('finish', function () {
        console.log("BOOM BOOM BOOM BOOM BOOM BOOM BOOM BOOM BOOM ")
    });

1 个答案:

答案 0 :(得分:4)

这是当前流实现的known issue

在流文档和代码中,有multiple places,其中提到_read()应该是异步的。

因此,如果您在_read()实施中没有实际执行(异步)i / o,那么您可能需要(至少偶尔)在setImmediate()之前调用push() ,以防止调用堆栈变得太大。例如,这可以不崩溃:

Streamo.prototype._read = function (n) {
    this._currentIndex += 1;
    if (this._currentIndex % writeEvery == 0) {
        console.log(this._currentIndex + ' of ' + this._maxIndex)
    };

    if (this._currentIndex < 0 || this._currentIndex >= this._maxIndex) {
        console.log("BOOM")
        this.push(null);
        return;
    }
    var self = this;
    if (this._currentIndex % writeEvery == 0) {
      setImmediate(function() {
        self.push(true);
      });
    } else
      this.push(true);
};