我有一个压缩的gzip文件,我想逐行阅读。
var fs = require('fs')
var zlib = require('zlib')
var gunzip = zlib.createGunzip()
var inp = fs.createReadStream('test.gz')
var n = 0
var lineProcessing = function (err, data) {
if (!err) {
n += 1
console.log ("line: " + n)
console.log (data.toString())
}
}
inp
.on('data', function (chunk) {
zlib.gunzip (chunk, lineProcessing)
})
.on('end', function () {
console.log ('ende');
});
我想我需要为zlib.createGunzip
设置一个chunksize,我只读到下一个\n
。但是如何动态确定呢?
答案 0 :(得分:13)
使用readline
可能更容易:
const fs = require('fs');
const zlib = require('zlib');
const readline = require('readline');
let lineReader = readline.createInterface({
input: fs.createReadStream('test.gz').pipe(zlib.createGunzip())
});
let n = 0;
lineReader.on('line', (line) => {
n += 1
console.log("line: " + n);
console.log(line);
});
答案 1 :(得分:3)
如果几年后仍然有人在研究如何做,并且想要一种可以与async
/ await
一起使用的解决方案,这就是我正在做的事情(TypeScript,但是您可以放弃类型注释)。
import fs from "fs";
import zlib from "zlib";
import readline from "readline";
const line$ = (path: string) => readline.createInterface({
input: fs.createReadStream(path).pipe(zlib.createGunzip()),
crlfDelay: Infinity
});
const yourFunction = async () => {
for await (const line of line$("/path/to/file.txt.gz")) {
// do stuff with line
}
}
答案 2 :(得分:0)
在TypeScript中逐行读取纯文本或gzip文件:
import * as fs from 'fs';
import * as zlib from 'zlib'
import * as readline from 'readline'
function readFile(path: string) {
let stream: NodeJS.ReadableStream = fs.createReadStream(path)
if(/\.gz$/i.test(path)) {
stream = stream.pipe(zlib.createGunzip())
}
return readline.createInterface({
input: stream,
crlfDelay: Infinity
})
}
async function main() {
const lineReader = readFile('/usr/share/man/man1/less.1.gz')
for await(const line of lineReader) {
console.log(line)
}
}
main().catch(err => {
console.error(err);
process.exit(1)
})