Nodejs - 从文件中逐行读取,为每一行执行异步操作并恢复

时间:2015-01-18 12:49:50

标签: node.js

我正在尝试逐行读取文件,执行一些具有回调的操作以及函数完成后恢复行读取。例如:

var fs = require('fs');
var readline = require('readline');
var stream = require('stream');
var instream = fs.createReadStream('./phrases.txt');
var outstream = new stream;
var rl = readline.createInterface(instream, outstream);
rl.on('line', function (line) {
  rl.pause();
  setTimeout(function () {
    console.log('resuming');
    rl.resume();
  }, 2000);
});

我的印象是上面的例子基本上应该读一行,等待2秒,console.log然后继续下一行。真正发生的是它等待前2秒,然后喷出大量的console.log

6 个答案:

答案 0 :(得分:13)

Line by Line模块可帮助您逐行读取大型文本文件,而无需将文件缓冲到内存中。

您可以异步处理这些行。这是提供的示例:

var LineByLineReader = require('line-by-line'),
    lr = new LineByLineReader('big_file.txt');

lr.on('error', function (err) {
    // 'err' contains error object
});

lr.on('line', function (line) {
    // pause emitting of lines...
    lr.pause();

    // ...do your asynchronous line processing..
    setTimeout(function () {

        // ...and continue emitting lines.
        lr.resume();
    }, 100);
});

lr.on('end', function () {
    // All lines are read, file is closed now.
});

答案 1 :(得分:2)

function createLineReader(fileName){
    var EM = require("events").EventEmitter
    var ev = new EM()
    var stream = require("fs").createReadStream(fileName)
    var remainder = null;
    stream.on("data",function(data){
        if(remainder != null){//append newly received data chunk
            var tmp = new Buffer(remainder.length+data.length)
            remainder.copy(tmp)
            data.copy(tmp,remainder.length)
            data = tmp;
        }
        var start = 0;
        for(var i=0; i<data.length; i++){
            if(data[i] == 10){ //\n new line
                var line = data.slice(start,i)
                ev.emit("line", line)
                start = i+1;
            }
        }
        if(start<data.length){
            remainder = data.slice(start);
        }else{
            remainder = null;
        }
    })

    stream.on("end",function(){
        if(null!=remainder) ev.emit("line",remainder)
    })

    return ev
}


//---------main---------------
fileName = process.argv[2]

lineReader = createLineReader(fileName)
lineReader.on("line",function(line){
    console.log(line.toString())
    //console.log("++++++++++++++++++++")
})

答案 2 :(得分:1)

存在一个非常好的线路阅读器模块, https://github.com/nickewing/line-reader

简单代码:

 var lineReader = require('line-reader');
   lineReader.eachLine('file.txt', function(line, last) {
      // do whatever you want with line...
      console.log(line);
      if(last){
         // or check if it's the last one
      }
   });

还有“java-style”界面以获得更多控制权:

lineReader.open('file.txt', function(reader) {
  if (reader.hasNextLine()) {
    reader.nextLine(function(line) {
      console.log(line);
    });
  }
});

另一个很酷的解决方案:

var fs = require('fs'),
    sleep = require('sleep'),
    readline = require('readline');

var rd = readline.createInterface({
    input: fs.createReadStream('phrases.txt'),
    output: process.stdout,
    terminal: false
});

rd.on('line', function(line) {
    console.log('-------')
    console.log(line);
    sleep.sleep(2)

});

答案 3 :(得分:1)

我建议将stdio用于此类操作,因为输入流会自动暂停和恢复,并且您无需担心系统资源。仅需几个MB的内存,您就可以读取非常大的文件:

此示例每2秒打印一行:

$ node myprogram.js < file.txt
import { read } from 'stdio';

async function onLine (line) {
  console.log(line);
  await sleep(2000);
}

read(onLine)
  .then(() => console.log('finished'));

请注意,我正在使用异步sleep来表示任何异步任务。默认情况下,它不包含在Node.js中,但如下所示:

const sleep = (delay) => new Promise((resolve) => setTimeout(resolve, delay));

答案 4 :(得分:0)

这是使用可在nodejs 8中运行的行读取器的Typescript中的简单解决方案:

<div #myelement2 [hidden]="!isDivShown">Element_2</div>

请注意,它不会在执行之前缓冲整个文件,因此适合处理大型文本文件。

答案 5 :(得分:-1)

const readline = require('readline');
const fs = require('fs');

const rl = readline.createInterface({
  input: fs.createReadStream('sample.txt')
});

rl.on('line', (line) => {
  console.log(`Line from file: ${line}`);
});

来源:https://nodejs.org/api/readline.html#readline_example_read_file_stream_line_by_line