节点流:拉n行,转换并继续

时间:2016-12-15 18:32:42

标签: node.js parsing stream transform fs

在我的NodeJS中,我想读取一个大文件,按片段(通过n行)处理它,使用数据然后处理接下来的n行。

我尝试使用多个模块(fs,es-stream,node-etl作为主要模块),但我无法做到我想做的事。

我所做的最好(代码如下)在处理新行之前并没有等待转换完成。

这是我的代码:

const fs = require('fs');
const es = require('event-stream');
const parse = require('csv-parse');
const stringify = require('csv-stringify');
const etl = require('etl');

exports.toDatabase = (file, done) => {

  // File contains six lines wit htwo values (example : aa:bb for the first line)
  let input = fs.createReadStream(todayTeamsFile);

  input
    .pipe(es.split())
    .pipe(etl.collect(2))
    .pipe(es.map((data, nextMap) => {
      // I'd like to process all this code before continuing to read my stream
      let date = Date.now();
      console.log('map data ' + date);
      console.log(data);

      parse(data[0], {
        delimiter: ';'
      }, (err, output) => {
        console.log('Row done ' + date);
        // Treatment to do would be to insert in database the output
        console.log(output);
        console.log('------ ' + date);
        return nextMap();
      });

    }));

};

但是输出显示下一张地图是在第一次调用完成之前启动的

TESTING !!
map data 1481824486765
[ 'aa;zz', 'bb;cc' ]
map data 1481824486771
[ 'dd;ee', 'ff;gg' ]
Row done 1481824486765
[ [ 'aa', 'zz' ] ]
------ 1481824486765
Row done 1481824486771
[ [ 'dd', 'ee' ] ]
------ 1481824486771
map data 1481824486785
[ 'hh;ii', '' ]
Row done 1481824486785
[ [ 'hh', 'ii' ] ]
------ 1481824486785

1 个答案:

答案 0 :(得分:0)

不确定为什么你想重新组合分块流,但是你走了,

var through = require('through2');
var split = require('split')
var fs = require('fs')
var handler = (function(len) {
  var buff = [];
  var p = 0;
  return through.obj(function(chunk, enc, cb) {
    buff.push(chunk);
    p++;
    if(buff.length===len) {
      console.log('send--------------');
      this.push(buff);
      buff = [];
    }
    if (p>25) {
      this.emit('error', 'kill the stream')
    }
    cb();
  }, function (cb) {
    this.push(buff); // may be much larger than 4, it may need a logic to re split.
    buff = [];
    cb();
  });
})(4);

var origin = fs.createReadStream('tomate.csv');

origin
.pipe(split())
.pipe(handler)
.pipe(through.obj(function(chunk, enc, cb){
  console.log('process: %v', chunk);
  cb()
}));

handler.on('error', function () {
  origin.close();
  // still need o unpipe everything, use mississipi
});