我正在尝试理解如何在fast-csv中使用光纤来制作逐行读取器(单用户命令行脚本),该读取器暂停每行的读取/处理,直到该行完成各种操作异步调用。 (没有滚动我自己的csv代码,我想使用已经找出关于csv格式的陷阱的东西)
如果我这样做
var csv = require("fast-csv");
var CSV_STRING = 'a,b\n' +
'a1,b1\n' +
'a2,b2\n';
csv
.fromString(CSV_STRING, {headers: false})
.on("record", function (data) {
console.log("line="+JSON.stringify(data));
setTimeout(function(){
console.log("timeout");
},2000);
})
.on("end", function () {
console.log("done parsing CSV records");
});
console.log("done initializing csv parse");
我得到了我的期望:
done initializing csv parse
line=["a","b"]
line=["a1","b1"]
line=["a2","b2"]
done parsing CSV records
timeout
timeout
timeout
如果我尝试在每条记录后使用光纤产生
Fiber(
function () {
var fiber = Fiber.current;
csv
.fromString(CSV_STRING, {headers: false})
.on("record", function (data) {
console.log("line="+JSON.stringify(data));
setTimeout(function(){
console.log("timeout");
fiber.run();
},2000);
Fiber.yield();
})
.on("end", function () {
console.log("done parsing CSV records");
});
console.log("done initializing csv parse");
}).run();
我得到了
done initializing csv parse
line=["a","b"]
events.js:141
throw er; // Unhandled 'error' event
^
Error: yield() called with no fiber running
我想我明白发生了什么,Fiber()。run()中的代码完成,所以它在调用yield之前离开光纤,所以当它达到yield时,就不再有光纤了。 (因此聪明的错误信息“没有光纤运行”)
在我完成解析之前,保持光纤运行的适当方法是什么?
似乎这么简单的问题,但我没有看到明显的答案?起初我想在它离开Future()。run()之前把权利放好,但是这不起作用,因为第一个fiber.run()会让它再次离开光纤。
我想要的是流程如下:
done initializing csv parse
line=["a","b"]
timeout
line=["a1","b1"]
timeout
line=["a2","b2"]
timeout
done parsing CSV records
但是如果没有重新编写fast-csv的内部可能是不可能的,因为它控制每个记录触发事件的时间。我目前的想法是,每个事件在fast-csv中被触发,并且让用户在csv.on(“record”)中处理事件,将控制权返回到快速解析csv的循环中。 -csv
答案 0 :(得分:0)
节点:v5.4.0
嗯,这是获得这种行为的一种方法。我使用es6生成器逐行读取原始文件然后在fast-csv库上使用生成器来逐行读取解析原始字符串,这导致非异步执行流和输出类似旧单用户命令行脚本。
'use strict';
var csv = require("fast-csv");
var sfs = require('./sfs');
function parse(line) {
csv
.fromString(line, {headers: false})
.on("record", function (data) {
it.next(data);
});
}
function *main() {
// Make sure to initialize with a max buffer big enough to span any possible line length. Otherwise undefined
var fs = new sfs(it, 4096);
var result=yield fs.open("data.csv");
var line;
while((line=yield fs.readLine()) != null) {
console.log("line="+line);
var csvData=yield parse(line);
console.log("value1="+csvData[0]+" value2="+csvData[1]);
}
console.log("DONE");
}
var it = main();
it.next(); // get it all started
除了一个古怪(快速和hacky)类来包装我需要的fs东西。我确信有更好的方法来做我做的事情,但它适合我的需要。
sfs.js
'use strict';
var fs=require('fs')
class sfs {
constructor(it, maxbufsize) {
this.MAX_BUF=maxbufsize;
this.it=it;
this.fd=null;
this.lineBuf="";
this.buffer=new Buffer(this.MAX_BUF);
this.buflen=0;
}
open(file) {
var parent=this;
fs.open(file,'r',function(err,fd){
parent.fd=fd;
var parent2=parent;
fs.fstat(fd,function(err, stats){
parent2.stats=stats;
parent2.it.next(stats);
})
})
}
readLine(){
var parent = this;
var i=0
var s=this.stats.size
var line="";
var index=this.MAX_BUF-this.buflen;
// read data into buffer, buffer may already have data from previous read that was shifted left over extracted line
fs.read(this.fd,this.buffer,this.MAX_BUF-index,index,null,function(err,len,buf){
var expectedReadLen=parent.MAX_BUF-parent.buflen;
if(len < expectedReadLen) { // If we didn't read enough to backfill buffer, lets make sure the string is terminated
// as it shifts left so we don't try interpret older records to the right
parent.buffer.fill(' ',parent.buflen+len,parent.MAX_BUF);
}
parent.buflen+=len; // whatever was in buffer has more now
index=parent.buffer.indexOf('\n');
if(index > -1) {
line=parent.buffer.toString('utf8',0,index);
buf.copy(parent.buffer,0,index+1,parent.buflen); // shift unused data left
parent.buflen-=(index+1); // buffer left over after removing /n terminated line
if(len<expectedReadLen) { // If we didn't read enough to backfill buffer, lets make sure we erase old data
parent.buffer.fill(' ',parent.buflen,parent.MAX_BUF);
}
} else {
if(parent.buflen > 0) {
line=parent.buffer.toString('utf8',0,parent.buflen);
parent.buflen=0;
} else {
line=null;
}
}
parent.it.next(line);
});
}
close() {
fs.close(this.fd);
}
}
module.exports=sfs;
答案 1 :(得分:0)
Streams是可以使用/可恢复的:
var csv = require("fast-csv");
var CSV_STRING = 'a,b\n' +
'a1,b1\n' +
'a2,b2\n';
var stream = csv.fromString(CSV_STRING, { headers: false })
.on("data", function (data) {
// pause the stream
stream.pause();
console.log("line: " + JSON.stringify(data));
setTimeout(function () {
// all async stuff are done, resume the stream
stream.resume();
console.log("timeout");
}, 2000);
}).on("end", function () {
console.log("done parsing CSV records");
});
控制台输出几乎就是您想要的:
/*
line: ["a","b"]
timeout
line: ["a1","b1"]
timeout
line: ["a2","b2"]
done parsing CSV records
timeout
*/
我可以问你为什么绝对需要同步读取你的csv吗?