我有一个1.4GB的csv文件,我希望逐行遍历并解析每一行。解析每一行后,将该行添加到流中并将输出写为tsv文件。我认为下面的代码工作,但它只是将每一行添加到上一行的末尾,而不像我预期的那样添加换行符。我还尝试将.pipe(split2())
添加到.pipe(writeStream)
之前的行,以便在写入之前拆分数据,但这只会冻结应用程序。
是否有人在节点中成功地使用此过程进行读写?
var fs = require('fs'),
_ = require('lodash'),
split2 = require('split2'),
through2 = require('through2');
fs.createReadStream('input_file_name.csv')
.pipe(split2())
.pipe(through2.obj(function (chunk, enc, callback) {
// Process the CSV row
var row = _.zipObject(['header1', 'header2', 'header3'], chunk.toString().split(','));
this.push(processRow(row).join('\t')); // does an action to each row
callback()
}))
.pipe(fs.createWriteStream('output_file_name.tsv'));
答案 0 :(得分:0)
意识到我错过了一个好的CSV解析器,而不是简单地拆分,
,并在每个\n
字符串的末尾添加data
。
var fs = require('fs'),
_ = require('lodash'),
parse = require('csv-parse'),
transform = require('stream-transform');
var parser = parse();
var transformer = transform(function (record, callback) {
var row = _.zipObject(['header1', 'header2', 'header3'], record);
callback(null, processRow(row).join('\t') + '\n');
}, {parallel: 10}
);
fs.createReadStream('input_file_name.csv')
.pipe(parser)
.pipe(transformer)
.pipe(fs.createWriteStream('output_file_name.tsv'));