我有一个节点应用程序,它利用流来读取和处理来自数据库的数据。
我有一个读取器流,通过ZeroMQ发出请求,并且当它接收数据时,它将数据推送到管道中的下一个流。
第二个流将JSON数据写入文件,然后传递数据。
最终流将JSON数据转换为CSV,然后写出CSV文件。
我注意到的是,当我从数据库(超过10k行和大约2MB未压缩的原始数据)收到“大量”数据时,此过程需要相当长的时间(约20秒) )。在那20秒内,其他请求被饿死,无法完成。
这听起来不错吗?有没有办法放弃线程,允许它在读取/写入数据流时做其他工作?或者有更好的方法来处理这个文件I / O吗?
编辑代码
function FirstStream(msg) {
stream.Readable.call(this, { objectMode: true });
this.request = msg;
this._read = function() {
var self = this;
DBRequest.send(self.request).then(function(json) {
json.Body.messages.forEach(function(item) {
self.push(JSON.stringify(item));
});
self.push(null);
});
};
}
util.inherits(FirstStream, stream.Readable);
function SecondStream(filename, maxFileSize) {
stream.Transform.call(this, { objectMode: true });
this.filename = filename;
this.jsonArray = [];
this.buf = '';
this.bufferTooBig = false;
this.id = 0;
this.maxFileSize = maxFileSize || MB;
// Buffers JSON data, if the Buffer gets too large, then don't bother writing the JSON file
this._write = function(chunk, encoding, done) {
// If our buffer is too large, don't worry about caching more data
if(!this.bufferTooBig) {
var json = JSON.parse(chunk);
this.jsonArray.push(json);
this.buf = new Buffer(JSON.stringify(this.jsonArray));
// If the filesize is going to be over our Max filesize, then forget about it
if(this.buf.length > this.maxFileSize) {
fs.unlink(filename, function(err) { });
this.jsonArray = [];
this.buf = '';
this.bufferTooBig = true;
}
}
// Pass the data on to the next stream
this.push(chunk);
done();
};
this._flush = function(done) {
// If the filesize is within reason, then write out the file
if(!this.bufferTooBig) {
fs.writeFile(filename, this.buf.toString(), function(err) {
if(err) {
throw err;
}
done();
});
} else {
done();
}
};
}
util.inherits(SecondStream, stream.Transform);
function ThirdStream(filename) {
stream.Transform.call(this, { objectMode: true });
this.fileStream = fs.createWriteStream(filename);
this._write = function(chunk, encoding, done) {
this.fileStream.write(csvMessage);
this.push(csvMessage);
done();
};
this._flush = function(done) {
this.fileStream.end();
done();
};
}
util.inherits(ThirdStream, stream.Transform);
// USE CASE
var backendStream = new FirstStream(request)
.pipe(new SecondStream(jsonFileName))
.pipe(new ThirdStream(csvFileName))
.on('finish', function() { /* write response back to client */ });