库建议:NodeJs读取csv文件

时间:2014-04-15 09:58:48

标签: node.js csv

使用nodejs我想解析10000个记录的.csv文件并对每一行进行一些操作。我尝试使用http://www.adaltas.com/projects/node-csv。我不能让它在每一行停下来。这只是读取所有10000条记录。我需要做以下

  1. 逐行阅读csv
  2. 在每一行执行耗时的操作
  3. 转到下一行
  4. 有人可以在这里建议任何其他想法吗?

17 个答案:

答案 0 :(得分:66)

似乎你需要使用一些基于流的解决方案,已经存在这样的库,所以在重新发明自己之前,尝试这个库,其中还包括验证支持。 https://www.npmjs.org/package/fast-csv

答案 1 :(得分:47)

我当前的解决方案使用异步模块串行执行:

var fs = require('fs');
var parse = require('csv-parse');
var async = require('async');

var inputFile='myfile.csv';

var parser = parse({delimiter: ','}, function (err, data) {
  async.eachSeries(data, function (line, callback) {
    // do something with the line
    doSomething(line).then(function() {
      // when processing finishes invoke the callback to move to the next one
      callback();
    });
  })
});
fs.createReadStream(inputFile).pipe(parser);

答案 2 :(得分:41)

我用这种方式: -

var fs = require('fs'); 
var parse = require('csv-parse');

var csvData=[];
fs.createReadStream(req.file.path)
    .pipe(parse({delimiter: ':'}))
    .on('data', function(csvrow) {
        console.log(csvrow);
        //do something with csvrow
        csvData.push(csvrow);        
    })
    .on('end',function() {
      //do something wiht csvData
      console.log(csvData);
    });

答案 3 :(得分:10)

您正在引用的node-csv项目完全足以完成转换大部分CSV数据的每一行的任务,该文档来自以下文档:http://csv.adaltas.com/transform/

csv()
  .from('82,Preisner,Zbigniew\n94,Gainsbourg,Serge')
  .to(console.log)
  .transform(function(row, index, callback){
    process.nextTick(function(){
      callback(null, row.reverse());
    });
});

根据我的经验,我可以说它也是一个相当快速的实现,我一直在处理具有接近10k记录的数据集,并且整个集合的处理时间在合理的几十毫秒级别

了解 jurka 的基于流的解决方案建议:node-csv是基于流的,并遵循Node.js的流API。

答案 4 :(得分:8)

要暂停fast-csv中的流式传输,您可以执行以下操作:

let csvstream = csv.fromPath(filePath, { headers: true })
    .on("data", function (row) {
        csvstream.pause();
        // do some heavy work
        // when done resume the stream
        csvstream.resume();
    })
    .on("end", function () {
        console.log("We are done!")
    })
    .on("error", function (error) {
        console.log(error)
    });

答案 5 :(得分:6)

  • 此解决方案使用csv-parser代替某些情况下使用的csv-parse 以上答案。
  • csv-parser大约在两年后 csv-parse
  • 他们两个都实现了相同的目的,但我个人发现 csv-parser更好,因为通过它很容易处理标头。

首先安装csv-parser:

npm install csv-parser

因此,假设您有一个这样的csv文件:

NAME, AGE
Lionel Messi, 31
Andres Iniesta, 34

您可以通过以下方式执行所需的操作

const fs = require('fs'); 
const csv = require('csv-parser');

fs.createReadStream(inputFilePath)
.pipe(csv())
.on('data', function(data){
    try {
        console.log("Name is: "+data.NAME);
        console.log("Age is: "+data.AGE);

        //perform the operation
    }
    catch(err) {
        //error handler
    }
})
.on('end',function(){
    //some final operation
});  

进一步阅读refer

答案 6 :(得分:5)

Fast-CSV npm模块可以从csv文件逐行读取数据。

这里是一个例子:

    <script>
    var socket = io()
    socket.connect('http://192.168.34.54:4747');
    socket.on("chat", addChat)
    $(() => {
        getChats()
        textarea.addEventListener('keydown', function(event){
                    if(event.which === 13 && event.shiftKey == false){
                        // Emit to server input
                        if( $('#username').val() != '')
                        {
                            var chatMessage = {
                                name: $("#username").val(), chat: $("#textarea").val()
                            }
                            postChat(chatMessage)
                            var obj = document.getElementById('textarea');
                            obj.value = '';
                        }
                        else{
                            alert('Plear enter name of the sender!!!!');
                        }
                    }
                })
    })

    function postChat(chat) {
        $.post("http://192.168.34.54:4747/chatroom/putChats", chat)
    }

    function getChats() {
        $.get("http://192.168.34.54:4747/chatroom/getChats", (chats) => {
            chats.forEach(addChat)
        })
    }

    function addChat(chatObj) {
        var message = document.getElementById('messages');
        message.value +=  "\n" + chatObj.name+": "+chatObj.chat;
    }

    socket.on('cleared', function(){
        var message = document.getElementById('messages');
        message.value = '';
    });

    // Handle Chat Clear
    clear.addEventListener('click', function(){
        $.post("http://192.168.34.54:4747/chatroom/clear")
    });

    function myFunction() {
        // Get the snackbar DIV
        var x = document.getElementById("snackbar");

        // Add the "show" class to DIV
        x.className = "show";

        // After 3 seconds, remove the show class from DIV
        setTimeout(function(){ x.className = x.className.replace("show", ""); }, 3000);
    }
    (function($) {
    $(document).ready(function() {
        myFunction();
    });
    })(jQuery);
</script>

答案 7 :(得分:3)

好的,所以这里有很多答案,我不认为它们回答了您的问题,我认为这与我的问题相似。

您需要执行类似联系数据库或第三方api的操作,这需要时间并且是异步的。由于大小原因或其他原因,您不想将整个文档加载到内存中,因此需要逐行阅读以进行处理。

我已经阅读了fs文档,它可以在阅读时暂停,但是使用.on('data')调用将使其连续,其中大多数答案都在使用并导致问题。


更新:我比想了解的更多有关流的信息

执行此操作的最佳方法是创建可写流。这会将csv数据通过管道传输到可写流中,从而可以管理异步调用。管道将一直管理缓冲区,直到返回到读取器为止,这样您就不会因占用大量内存而烦恼

简单版本

<table class="table table-striped why-choose-tbl" id="insurance">
   <thead>
      <tr>
         <th scope="col"></th>
         <th scope="col">Basic</th>
         <th scope="col">Comprehensive</th>
      </tr>
   </thead>
   <tbody>
      <tr>
        <td>Medical</td>
        <td><span><?php the_field('basic_medical') ?></span><i class="fas fa-check"></i></td>
        <td><span><?php the_field('comp_medical') ?></span><i class="fas fa-check"></i></td>
     </tr>
     <tr>
        <td>Cancellation</td>
        <td><span><?php the_field('basic_cancellation') ?></span><i class="fas fa-check"></i></td>
        <td><span><?php the_field('comp_cancellation') ?></span><i class="fas fa-check"></i></td>
     </tr>
   </tbody>
</table>

<script>
$('#insurance tr').filter(function() {
    return $.trim($(this).text()) === '';
}).hide(); 

// $('#insurance > tbody  > tr').has('td:empty').hide()

// $('#insurance > tbody  > tr').each(function () {
//    if ($(this).find('td').is(':empty')) {
//        $(this).hide();
//    }
// });
</script>

课程版本

<?php 
     $basic = the_field('basic_medical');
     $comp =the_field('comp_medical');

   if (empty($basic) && empty($comp))
        {
            echo "<td style=/"display:none;/"><i class="fas fa-check"></i><span><?php the_field('basic_medical') ?> </span></td>";
            echo "<td style=/"display:none;/"><i class="fas fa-check"></i><span><?php the_field('comp_medical') ?></span></td>";
        }
?>

旧方法:

可读的问题

const parser = require('csv-parser');
const stripBom = require('strip-bom-stream');
const stream = require('stream')

const mySimpleWritable = new stream.Writable({
  objectMode: true, // Because input is object from csv-parser
  write(chunk, encoding, done) { // Required
    // chunk is object with data from a line in the csv
    console.log('chunk', chunk)
    done();
  },
  final(done) { // Optional
    // last place to clean up when done
    done();
  }
});
fs.createReadStream(fileNameFull).pipe(stripBom()).pipe(parser()).pipe(mySimpleWritable)

您会注意到一个const parser = require('csv-parser'); const stripBom = require('strip-bom-stream'); const stream = require('stream') // Create writable class class MyWritable extends stream.Writable { // Used to set object mode because we get an object piped in from csv-parser constructor(another_variable, options) { // Calls the stream.Writable() constructor. super({ ...options, objectMode: true }); // additional information if you want this.another_variable = another_variable } // The write method // Called over and over, for each line in the csv async _write(chunk, encoding, done) { // The chunk will be a line of your csv as an object console.log('Chunk Data', this.another_variable, chunk) // demonstrate await call // This will pause the process until it is finished await new Promise(resolve => setTimeout(resolve, 2000)); // Very important to add. Keeps the pipe buffers correct. Will load the next line of data done(); }; // Gets called when all lines have been read async _final(done) { // Can do more calls here with left over information in the class console.log('clean up') // lets pipe know its done and the .on('final') will be called done() } } // Instantiate the new writable class myWritable = new MyWritable(somevariable) // Pipe the read stream to csv-parser, then to your write class // stripBom is due to Excel saving csv files with UTF8 - BOM format fs.createReadStream(fileNameFull).pipe(stripBom()).pipe(parser()).pipe(myWritable) // optional .on('finish', () => { // will be called after the wriables internal _final console.log('Called very last') }) 标志。我注意到,由于某种原因,在文件结尾附近,.on('read')在大小文件上都被第二次调用。我不确定为什么,但是这阻止了读取相同订单项的第二个过程。

答案 8 :(得分:3)

我需要一个异步csv读取器,最初尝试使用@Pransh Tiwari的答案,但无法使其与awaitutil.promisify()一起使用。最终,我遇到了node-csvtojson,它几​​乎和csv-parser一样,但是有希望。这是csvtojson实际使用的示例:

const csvToJson = require('csvtojson');

const processRecipients = async () => {
    const recipients = await csvToJson({
        trim:true
    }).fromFile('./recipients.csv');

    // Code executes after recipients are fully loaded.
    recipients.forEach((recipient) => {
        console.log(recipient.name, recipient.email);
    });
};

答案 9 :(得分:2)

逐行尝试插件。

npm install line-by-line --save

答案 10 :(得分:1)

这是我从外部网址获取csv文件的解决方案

const parse = require( 'csv-parse/lib/sync' );
const axios = require( 'axios' );
const readCSV = ( module.exports.readCSV = async ( path ) => {
try {
   const res = await axios( { url: path, method: 'GET', responseType: 'blob' } );
   let records = parse( res.data, {
      columns: true,
      skip_empty_lines: true
    } );

    return records;
 } catch ( e ) {
   console.log( 'err' );
 }

} );
readCSV('https://urltofilecsv');

答案 11 :(得分:1)

我使用的是csv-parse,但是对于较大的文件却遇到了性能问题,我发现更好的库之一是Papa Parse,文档是很好的,良好的支持,轻量级的,没有依赖性的。

安装papaparse

npm install papaparse

用法:

  • 异步/等待
const fs = require('fs');
const Papa = require('papaparse');

const csvFilePath = 'data/test.csv'

// Function to read csv which returns a promise so you can do async / await.

const readCSV = async (filePath) => {
  const csvFile = fs.readFileSync(filePath)
  const csvData = csvFile.toString()  
  return new Promise(resolve => {
    Papa.parse(csvData, {
      header: true,
      transformHeader: header => header.trim(),
      complete: results => {
        console.log('Complete', results.data.length, 'records.'); 
        resolve(results.data);
      }
    });
  });
};

const test = async () => {
  let parsedData = await readCSV(csvFilePath); 
}

test()
  • 回调
const fs = require('fs');
const Papa = require('papaparse');

const csvFilePath = 'data/test.csv'

const file = fs.createReadStream(csvFilePath);

var csvData=[];
Papa.parse(file, {
  header: true,
  transformHeader: header => header.trim(),
  step: function(result) {
    csvData.push(result.data)
  },
  complete: function(results, file) {
    console.log('Complete', csvData.length, 'records.'); 
  }
});

注意header: true是配置中的一个选项,有关其他选项,请参阅文档

答案 12 :(得分:0)

fs = require('fs');
fs.readFile('FILENAME WITH PATH','utf8', function(err,content){
if(err){
    console.log('error occured ' +JSON.stringify(err));
 }
 console.log('Fileconetent are ' + JSON.stringify(content));
})

答案 13 :(得分:0)

您可以使用csv-to-json模块将csv转换为json格式,然后可以在程序中轻松使用json文件

答案 14 :(得分:0)

使用 await / async 执行此任务的解决方法:

const csv = require('csvtojson')
const csvFilePath = 'data.csv'
const array = await csv().fromFile(csvFilePath);

答案 15 :(得分:0)

我使用了一个简单的例子:https://www.npmjs.com/package/csv-parser

使用非常简单:

const csv = require('csv-parser')
const fs = require('fs')
const results = [];

fs.createReadStream('./CSVs/Update 20191103C.csv')
  .pipe(csv())
  .on('data', (data) => results.push(data))
  .on('end', () => {
    console.log(results);
    console.log(results[0]['Lowest Selling Price'])
  });

答案 16 :(得分:-1)

npm install csv

示例CSV文件 你需要一个CSV文件来解析,所以要么你已经有一个,或者你可以复制下面的文本并将其粘贴到一个新文件中并调用该文件“mycsv.csv”

ABC, 123, Fudge
532, CWE, ICECREAM
8023, POOP, DOGS
441, CHEESE, CARMEL
221, ABC, HOUSE
1
ABC, 123, Fudge
2
532, CWE, ICECREAM
3
8023, POOP, DOGS
4
441, CHEESE, CARMEL
5
221, ABC, HOUSE

示例代码读取和解析CSV文件

创建一个新文件,并在其中插入以下代码。务必仔细阅读幕后发生的事情。

    var csv = require('csv'); 
    // loads the csv module referenced above.

    var obj = csv(); 
    // gets the csv module to access the required functionality

    function MyCSV(Fone, Ftwo, Fthree) {
        this.FieldOne = Fone;
        this.FieldTwo = Ftwo;
        this.FieldThree = Fthree;
    }; 
    // Define the MyCSV object with parameterized constructor, this will be used for storing the data read from the csv into an array of MyCSV. You will need to define each field as shown above.

    var MyData = []; 
    // MyData array will contain the data from the CSV file and it will be sent to the clients request over HTTP. 

    obj.from.path('../THEPATHINYOURPROJECT/TOTHE/csv_FILE_YOU_WANT_TO_LOAD.csv').to.array(function (data) {
        for (var index = 0; index < data.length; index++) {
            MyData.push(new MyCSV(data[index][0], data[index][1], data[index][2]));
        }
        console.log(MyData);
    });
    //Reads the CSV file from the path you specify, and the data is stored in the array we specified using callback function.  This function iterates through an array and each line from the CSV file will be pushed as a record to another array called MyData , and logs the data into the console to ensure it worked.

var http = require('http');
//Load the http module.

var server = http.createServer(function (req, resp) {
    resp.writeHead(200, { 'content-type': 'application/json' });
    resp.end(JSON.stringify(MyData));
});
// Create a webserver with a request listener callback.  This will write the response header with the content type as json, and end the response by sending the MyData array in JSON format.

server.listen(8080);
// Tells the webserver to listen on port 8080(obviously this may be whatever port you want.)
1
var csv = require('csv'); 
2
// loads the csv module referenced above.
3
​
4
var obj = csv(); 
5
// gets the csv module to access the required functionality
6
​
7
function MyCSV(Fone, Ftwo, Fthree) {
8
    this.FieldOne = Fone;
9
    this.FieldTwo = Ftwo;
10
    this.FieldThree = Fthree;
11
}; 
12
// Define the MyCSV object with parameterized constructor, this will be used for storing the data read from the csv into an array of MyCSV. You will need to define each field as shown above.
13
​
14
var MyData = []; 
15
// MyData array will contain the data from the CSV file and it will be sent to the clients request over HTTP. 
16
​
17
obj.from.path('../THEPATHINYOURPROJECT/TOTHE/csv_FILE_YOU_WANT_TO_LOAD.csv').to.array(function (data) {
18
    for (var index = 0; index < data.length; index++) {
19
        MyData.push(new MyCSV(data[index][0], data[index][1], data[index][2]));
20
    }
21
    console.log(MyData);
22
});
23
//Reads the CSV file from the path you specify, and the data is stored in the array we specified using callback function.  This function iterates through an array and each line from the CSV file will be pushed as a record to another array called MyData , and logs the data into the console to ensure it worked.
24
​
25
var http = require('http');
26
//Load the http module.
27
​
28
var server = http.createServer(function (req, resp) {
29
    resp.writeHead(200, { 'content-type': 'application/json' });
30
    resp.end(JSON.stringify(MyData));
31
});
32
// Create a webserver with a request listener callback.  This will write the response header with the content type as json, and end the response by sending the MyData array in JSON format.
33
​
34
server.listen(8080);
35
// Tells the webserver to listen on port 8080(obviously this may be whatever port you want.)
Things to be aware of in your app.js code
In lines 7 through 11, we define the function called 'MyCSV' and the field names.

If your CSV file has multiple columns make sure you define this correctly to match your file.

On line 17 we define the location of the CSV file of which we are loading.  Make sure you use the correct path here.

启动您的应用并验证功能 打开控制台并键入以下命令:

节点应用 1 节点应用 您应该在控制台中看到以下输出:

[  MYCSV { Fieldone: 'ABC', Fieldtwo: '123', Fieldthree: 'Fudge' },
   MYCSV { Fieldone: '532', Fieldtwo: 'CWE', Fieldthree: 'ICECREAM' },
   MYCSV { Fieldone: '8023', Fieldtwo: 'POOP', Fieldthree: 'DOGS' },
   MYCSV { Fieldone: '441', Fieldtwo: 'CHEESE', Fieldthree: 'CARMEL' },
   MYCSV { Fieldone: '221', Fieldtwo: 'ABC', Fieldthree: 'HOUSE' }, ]

1     [MYCSV {Fieldone:'ABC',Fieldtwo:'123',Fieldthree:'Fudge'}, 2        MYCSV {Fieldone:'532',Fieldtwo:'CWE',Fieldthree:'ICECREAM'}, 3        MYCSV {Fieldone:'8023',Fieldtwo:'POOP',Fieldthree:'DOGS'}, 4        MYCSV {Fieldone:'441',Fieldtwo:'CHEESE',Fieldthree:'CARMEL'}, 五        MYCSV {Fieldone:'221',Fieldtwo:'ABC',Fieldthree:'HOUSE'},] 现在您应该打开Web浏览器并导航到您的服务器。您应该看到它以JSON格式输出数据。

结论 使用node.js及其CSV模块,我们可以快速轻松地读取和使用存储在服务器上的数据,并根据请求将其提供给客户端