逐行读取gzip流

时间:2016-06-28 10:56:15

标签: node.js gzip zlib

我有一个压缩的gzip文件,我想逐行阅读。

var fs = require('fs')
var zlib = require('zlib')
var gunzip = zlib.createGunzip()
var inp = fs.createReadStream('test.gz')
var n = 0

var lineProcessing = function (err, data) {
    if (!err) {
        n += 1
        console.log ("line: " + n)
        console.log (data.toString())
    }
}

inp
  .on('data', function (chunk) {
      zlib.gunzip (chunk, lineProcessing)
  })
  .on('end', function () {
    console.log ('ende');
  });

我想我需要为zlib.createGunzip设置一个chunksize,我只读到下一个\n。但是如何动态确定呢?

3 个答案:

答案 0 :(得分:13)

使用readline可能更容易:

const fs       = require('fs');
const zlib     = require('zlib');
const readline = require('readline');

let lineReader = readline.createInterface({
  input: fs.createReadStream('test.gz').pipe(zlib.createGunzip())
});

let n = 0;
lineReader.on('line', (line) => {
  n += 1
  console.log("line: " + n);
  console.log(line);
});

答案 1 :(得分:3)

如果几年后仍然有人在研究如何做,并且想要一种可以与async / await一起使用的解决方案,这就是我正在做的事情(TypeScript,但是您可以放弃类型注释)。

import fs from "fs";
import zlib from "zlib";
import readline from "readline";

const line$ = (path: string) => readline.createInterface({
    input: fs.createReadStream(path).pipe(zlib.createGunzip()),
    crlfDelay: Infinity
});

const yourFunction = async () => {
    for await (const line of line$("/path/to/file.txt.gz")) {
        // do stuff with line
    }
}

答案 2 :(得分:0)

在TypeScript中逐行读取纯文本或gzip文件:

import * as fs from 'fs';
import * as zlib from 'zlib'
import * as readline from 'readline'

function readFile(path: string) {
    let stream: NodeJS.ReadableStream = fs.createReadStream(path)
    
    if(/\.gz$/i.test(path)) {
        stream = stream.pipe(zlib.createGunzip())
    }

    return readline.createInterface({
        input: stream,
        crlfDelay: Infinity
    })
}

async function main() {
    const lineReader = readFile('/usr/share/man/man1/less.1.gz')

    for await(const line of lineReader) {
        console.log(line)
    }
}

main().catch(err => {
    console.error(err);
    process.exit(1)
})