我正在尝试提取文件的一行,因为我知道路径名和行号,理想情况下我想要这样做而不读取任何必要的文件
出于我在这里使用的目的,无论是异步还是同步都无关紧要。
我当前(糟糕)的实现如下所示:
function get_line(filename, line_no, callback) {
line_no = parseInt(line_no);
var data = fs.readFileSync(filename, 'utf8');
var lines = data.split("\n");
for (var l in lines) {
if (l == line_no - 1) {
callback(null, lines[l].trim());
return;
}
}
throw new Error('File end reached without finding line');
}
我尝试用createReadStream做一些事情,但数据事件似乎永远不会触发。任何人都可以提供这个问题的直接解决方案,或者指向一些NodeJS文件系统交互文档,这些文档比标准库API文档更具实例性驱动?
答案 0 :(得分:20)
可读流
var fs = require('fs');
function get_line(filename, line_no, callback) {
var stream = fs.createReadStream(filename, {
flags: 'r',
encoding: 'utf-8',
fd: null,
mode: 0666,
bufferSize: 64 * 1024
});
var fileData = '';
stream.on('data', function(data){
fileData += data;
// The next lines should be improved
var lines = fileData.split("\n");
if(lines.length >= +line_no){
stream.destroy();
callback(null, lines[+line_no]);
}
});
stream.on('error', function(){
callback('Error', null);
});
stream.on('end', function(){
callback('File end reached without finding line', null);
});
}
get_line('./file.txt', 1, function(err, line){
console.log('The line: ' + line);
})
直接解决方案:
您应该使用切片方法而不是循环。
var fs = require('fs');
function get_line(filename, line_no, callback) {
var data = fs.readFileSync(filename, 'utf8');
var lines = data.split("\n");
if(+line_no > lines.length){
throw new Error('File end reached without finding line');
}
callback(null, lines[+line_no]);
}
get_line('./file.txt', 9, function(err, line){
console.log('The line: ' + line);
})
for(var in in lines)不是循环数组的最有效方法,你应该这样做:
for(var i = 0, iMax = lines.length; i < iMax; i++){/* lines[i] */ }
异步方式:
var fs = require('fs');
function get_line(filename, line_no, callback) {
fs.readFile(filename, function (err, data) {
if (err) throw err;
// Data is a buffer that we need to convert to a string
// Improvement: loop over the buffer and stop when the line is reached
var lines = data.toString('utf-8').split("\n");
if(+line_no > lines.length){
return callback('File end reached without finding line', null);
}
callback(null, lines[+line_no]);
});
}
get_line('./file.txt', 9, function(err, line){
console.log('The line: ' + line);
})
答案 1 :(得分:6)
不读取任何必要的文件
编辑:模块没有维护,我建议使用其他模块逐行读取,例如,使用转换流:http://strongloop.com/strongblog/practical-examples-of-the-new-node-js-streams-api/
var n = 10;
var l = null;
//Internally it uses a buffer, default 16KB, but you can reduce it to, for example, 4KB doing:
//new BufferedReader ("file", { encoding: "utf8", bufferSize: 4*1024 })
new BufferedReader ("file", { encoding: "utf8" })
.on ("error", function (error){
//error
console.log (error);
})
.on ("line", function (line){
if (!--n){
l = line;
//With interrupt you can stop the reading
this.interrupt ();
}
})
.on ("end", function (){
//your Nth line!
console.log (l);
})
.read ();
答案 2 :(得分:4)
通过删除“fileData”变量中的先前数据,您可以大大提高FGRibreau答案的性能。
function(file, line_no, cb){
var stream = fs.createReadStream(file, {
flags: 'r',
encoding: 'utf-8',
fd: null,
mode: '0666',
bufferSize: 64 * 1024
});
var fileData = '';
stream.on('data', function(data){
fileData += data;
var lines = fileData.split('\n');
if(lines.length >= +line_no){
stream.destroy();
cb(null, lines[+line_no]);
}
// Add this else condition to remove all unnecesary data from the variable
else
fileData = Array(lines.length).join('\n');
});
stream.on('error', function(){
cb('Error', null);
});
stream.on('end', function(){
cb('File end reached without finding line', null);
});
};
使用70000行文件,要显示行号50000,我得到了这些结果:
真正的0m3.504s
用户0m0.000s
sys 0m0.015s
对于使用else的相同示例,我得到以下内容:
真实0m0.540s
用户0m0.015s
sys 0m0.031s
这也意味着内存消耗要低得多。