在第二个分隔符处分割文件流

时间:2018-10-15 10:08:03

标签: javascript node.js file stream pipeline

我当前正在尝试流式传输文件,看起来有点像这样:

Header
Title
Header
Title
...

我处理它的方式是使用这样的流:

fs.createReadStream(filePath, streamOpts)
  .pipe(split())
  .on('data', parseLine(data) => ...);

但是,这会将文件分割成每一行,因此data只是下一行的单个字符串。例如,

parseLine(line) => { console.log(data); } 
> "Header"

我想要的是将其拆分并以某种方式将data更改为一个对象,例如

> "{ header: 'Header', title: 'Title' }"

有没有办法做到这一点?我猜想在pipe期间需要得到两行而不是一行,但是我不知道怎么做。


我当前的方法如下:

const fs = require('fs');
const split = require('split');

var isHeaderLine = true;
var currentItem = {};
var items = [];

// Read the line from the stream
function parseFileLine(line) {
  if (isHeaderLine) {
    currentItem.header = line 
  } else {
    currentItem.title = line
    items.push(currentItem);
  }
} 

// Read the file as a stream
// Break it into processable lines
function parseFileLines(filePath) {
  const streamOpts = {
    encoding: 'utf8',
  };

  fs.createReadStream(filePath, streamOpts)
    .pipe(split())
    .on('data', parseLine);
}

但是我不认为这是最好的方法。有没有更好的方法来一次拆分和传递两条线,最好是作为对象或数组?

1 个答案:

答案 0 :(得分:0)

最终使用Transform解决了这个问题:

function bufferSplitOnce(buffer, splitBuffer, nth) {
  const splitIdx = buffer.indexOf(buffer, splitBuffer, nth);

  return (splitIdx === -1)
    ? [buffer]
    : [buffer.slice(0, splitIdx), buffer.slice(splitIdx + 1)];
}

function bufferSplitN(buffer, splitBuffer, n) {
  const result = [buffer];
  let currentItem;

  for (let i = 0; i < n; i += 1) {
    result.push(...bufferSplitOnce(result.pop(), splitBuffer));
  }

  return result;
}


const streamToEntry = new Transform({
  transform(chunk, encoding, callback) {
    let chunkTail = chunk;

    while (chunkTail) {
      const splitChunk = bufferSplitN(chunkTail, '\n', 2);

      this.push({
        code: splitChunk[0],
        value: splitChunk[1],
      });

      // eslint-disable-next-line prefer-destructuring
      chunkTail = splitChunk[2];
    }
    callback();
  },
  readableObjectMode: true,
  writableObjectMode: true,
});

fs.createReadStream(filePath, streamOpts)
  .pipe(streamToEntry)
  .on('data', (chunk) => {
    console.log(chunk);
  });