使用流逐行读取文本文件

时间:2019-01-10 14:45:11

标签: javascript node.js

我需要逐行读取文本文件,执行一些操作,然后在输出文件上写入。

由于文件很大(可能超过100MB),建议使用流来提高性能。

我编写了以下函数来读取文件,但是块的大小是恒定的,因此它并不总是包含一行或精确的多行。

function loadLogFileInfo(inputFilename, outputFilename) {
    return new Promise((resolve, reject) => {
        let inputStream = fs.createReadStream(inputFilename, "utf8");
        let outputStream = fs.createWriteStream(outputFilename);

        inputStream.on("error", () => {
            outputStream.end();
            reject();
        });
        inputStream.on("end", () => {
            outputStream.end();
            resolve();
        });

        inputStream.on("data", (chunk) => {
            let data = "";
            // Compute some data on the row and put in data variable
            outputStream.write(data + "\n");
        });
    });
}

我还发现this question可以解决二进制文件的问题,但是由于在这种情况下是文本文件,因此也许可以有一个更简单的解决方案。

2 个答案:

答案 0 :(得分:1)

您可以使用split(NPM上可用)很好地处理此问题。您可以看到GitHub上使用的代码。以下示例读取您的输入并将其逐行写入输出流。

const split = require('split');

function loadLogFileInfo(inputFilename, outputFilename) {
  return new Promise((resolve, reject) => {
    let inputStream = fs.createReadStream(inputFilename, "utf8").pipe(split());
    let outputStream = fs.createWriteStream(outputFilename);

    inputStream.on("error", () => {
      outputStream.end();
      reject();
    });
    inputStream.on("end", () => {
      outputStream.end();
      resolve();
    });
    inputStream.on("data", (line) => {
      outputStream.write(line + "\n");
    });
  });
}

答案 1 :(得分:0)

由于对此尚无答案或评论,我将在此发布我自己的解决方案,其灵感来自问题中提到的问题。

function loadLogFileInfo(inputFilename, outputFilename) {
  return new Promise((resolve, reject) => {
    let inputStream = fs.createReadStream(inputFilename, "utf8");
    let outputStream = fs.createWriteStream(outputFilename);
    let pending = "";

    inputStream.on("error", () => {
      outputStream.end();
      reject();
    });
    inputStream.on("end", () => {
      outputStream.end();
      resolve();
    });

    inputStream.on("data", (chunk) => {
      pending += chunk;
      let data = "";
      let idxRow = pending.indexOf("\n");

      while (idxRow !== -1) {
        let row = pending.slice(0, idxRow);
        pending= pending.slice(idxRow + 1);

        // compute data on  current row

        idxRow = pending.indexOf("\n");
      }

      outputStream.write(data + "\n");
    });
  });
}