Question

我可能总体上缺乏对流的深入了解。但是，我想知道我需要的东西应该如何有效地工作。

我想要实现一个csv文件的读取方法，然后对数据库（或api）进行查询并附加数据。之后，将带有附加数据的行写入新的csv文件。我为此使用fast-csv节点库。

这是我的实现方式：

const fs = require("fs");
const csv = require("fast-csv");
const delay = t => new Promise(resolve => setTimeout(resolve, t));


const asyncFunction = async (row, csvStream) => {
  // Imitate some stuff with database
  await delay(1200);
  row.data = "data";
  csvStream.write(row);
};

const array = [];

const csvStream = csv.format({ headers: true });

const writeStream = fs.createWriteStream("output.csv");

csvStream.pipe(writeStream).on("finish", () => {
  console.log("End of writing");
});
fs.createReadStream("input.csv")
  .pipe(csv.parse({ headers: true }))
  .transform(async function(row, next) {
    array.push(asyncFunction(row, csvStream));
    next();
  })
  .on("finish", function() {
    console.log("finished reading file");
  //Wait for all database requests and writings to be finished to close write stream
    Promise.all(array).then(() => {
      csvStream.end();
      console.log("finished writing file");
    });
  });

我特别想知道有什么方法可以优化我在这里所做的事情，因为我觉得我缺少关于如何将该库用于此类案件的重要信息

关于，罗卡斯

Answer 1

我能够在“快速cvs问题”部分中找到解决方案。要点，一个很好的人道格·马丁（Doug-Martin）提供了有关如何通过Transform流有效地执行这种操作的信息：

const path = require('path');
const fs = require('fs');
const { Transform } = require('stream');
const csv = require('fast-csv');

class PersistStream extends Transform {
    constructor(args) {
        super({ objectMode: true, ...(args || {}) });
        this.batchSize = 100;
        this.batch = [];
        if (args && args.batchSize) {
            this.batchSize = args.batchSize;
        }
    }

    _transform(record, encoding, callback) {
        this.batch.push(record);
        if (this.shouldSaveBatch) {
            // we have hit our batch size to process the records as a batch
            this.processRecords()
                // we successfully processed the records so callback
                .then(() => callback())
                // An error occurred!
                .catch(err => err(err));
            return;
        }
        // we shouldnt persist so ignore
        callback();
    }

    _flush(callback) {
        if (this.batch.length) {
            // handle any leftover records that were not persisted because the batch was too small
            this.processRecords()
                // we successfully processed the records so callback
                .then(() => callback())
                // An error occurred!
                .catch(err => err(err));
            return;
        }
        // no records to persist so just call callback
        callback();
    }

    pushRecords(records) {
        // emit each record for down stream processing
        records.forEach(r => this.push(r));
    }

    get shouldSaveBatch() {
        // this could be any check, for this example is is record cont
        return this.batch.length >= this.batchSize;
    }

    async processRecords() {
        // save the records
        const records = await this.saveBatch();
        // besure to emit them
        this.pushRecords(records);
        return records;
    }

    async saveBatch() {
        const records = this.batch;
        this.batch = [];
        console.log(`Saving batch [noOfRecords=${records.length}]`);
        // This is where you should save/update/delete the records
        return new Promise(res => {
            setTimeout(() => res(records), 100);
        });
    }
}

const processCsv = ({ file, batchSize }) =>
    new Promise((res, rej) => {
        let recordCount = 0;
        fs.createReadStream(file)
            // catch file read errors
            .on('error', err => rej(err))
            .pipe(csv.parse({ headers: true }))
            // catch an parsing errors
            .on('error', err => rej(err))
            // pipe into our processing stream
            .pipe(new PersistStream({ batchSize }))
            .on('error', err => rej(err))
            .on('data', () => {
                recordCount += 1;
            })
            .on('end', () => res({ event: 'end', recordCount }));
    });

const file = path.resolve(__dirname, `batch_write.csv`);
// end early after 30000 records
processCsv({ file, batchSize: 5 })
    .then(({ event, recordCount }) => {
        console.log(`Done Processing [event=${event}] [recordCount=${recordCount}]`);
    })
    .catch(e => {
        console.error(e.stack);
    });

https://gist.github.com/doug-martin/b434a04f164c81da82165f4adcb144ec

使用Node.js快速csv库读取和写入csv文件

1 个答案: