嗨所以我正在处理一个从数据库中读取大量数据的文件。然后我在nodejs中使用一个名为json2csv的模块,它转换数据集并将其写入csv文件。这是json2csv模块的链接。 https://www.npmjs.com/package/json2csv 使用这个模块,它将数据转换为csv数据,但我遇到的问题是,当它开始写入文件时,由于数据集太大,它会耗尽内存并崩溃。有什么方法可以写一小块数据而不是写所有数据?这是我到目前为止所拥有的。
/*require the ibm_db module*/
var ibmdb = require('ibm_db');
var fs = require("fs");
var json2csv = require('json2csv');
var db2query = "SELECT DISTINCT DISCEVNT.EVENT_RUNID, DISCEVNT.EVENT_TIME, DISCEVNT.EVENT_SEVERITY, DISCEVNT.EVENT_ATTRIBUTE, DISCEVNT.EVENT_DESC, DISCEVNT_ATTR_IP.DE_IP, DISCEVNT_ATTR_IP.DE_HOSTNAME, DISCEVNT_ATTR_IP.DE_FQDN, DISCRUN.SERVERID_X FROM (DB2INST2.DISCEVNT DISCEVNT INNER JOIN DB2INST2.DISCRUN DISCRUN ON (DISCEVNT.EVENT_RUNID = DISCRUN.DISCOVERRUNID_X)) INNER JOIN DB2INST2.DISCEVNT_ATTR_IP DISCEVNT_ATTR_IP ON (DISCEVNT_ATTR_IP.DE_ATTR = DISCEVNT.EVENT_ATTRIBUTE) WHERE (DISCEVNT.EVENT_RUNID BETWEEN 2017071400000000 AND 2017071900000000) ORDER BY DISCEVNT.EVENT_TIME ASC;";
var db2setup = "DRIVER={DB2};DATABASE=CMDB;UID=user;PWD=password!;HOSTNAME=localhost;port=3000";
var dataJson = [];
var fields = ['EVENT_RUNID','EVENT_TIME','EVENT_SEVERITY','EVENT_ATTRIBUTE','EVENT_DESC','DE_IP','DE_HOSTNAME','DE_FQDN','SERVERID_X'];
console.log("Test program to access DB2 sample database");
/*Connect to the database server
param 1: The DSN string which has the details of database name to connect to, user id, password, hostname, portnumber
param 2: The Callback function to execute when connection attempt to the specified database is completed
*/
function getData(values){
dataJson = values;
console.log(dataJson);
var csv = json2csv({ data: dataJson, fields: fields });
console.log("CSV data:");
console.log(csv[0]);
console.log(csv.length);
console.log(dataJson.length);
console.log(dataJson[0]);
fs.writeFile('C:\\Users\\ztaddmusr\\Desktop\\CSV FILES\\file.csv', csv, function(err) {
if (err) throw err;
console.log('file saved');
});
}
ibmdb.open(db2setup, function(err, conn)
{
if(err) {
/*
On error in connection, log the error message on console
*/
console.error("error: ", err.message);
} else {
/*
On successful connection issue the SQL query by calling the query() function on Database
param 1: The SQL query to be issued
param 2: The callback function to execute when the database server responds
*/
conn.query(db2query, function(err, data) {
if(err) {
/*
On error in connection, log the error message on console
*/
console.error("error: ", err.message);
} else {
//console.log(data);
getData(data);
console.log('Data finished retrieving from the datatbase. Now will write to file.')
}
/*
Close the connection to the database
param 1: The callback function to execute on completion of close function.
*/
conn.close(function(){
console.log("Connection Closed");
});
});
}
});
//var csv = json2csv({ data: dataJson, fields: fields });
//console.log(csv);
//fs.writeFile('file.csv', csv, function(err) {
// if (err) throw err;
// console.log('file saved');
//});
我一直在尝试使用循环,但一直搞乱。提前感谢您的帮助!
答案 0 :(得分:0)
如果将ES6与co(npm包)等实现一起使用,则可以使用生成器。然后,您可以根据分区暂停异步代码的执行,并使用块附加到文件。
像这样的东西(没有编译它;可能有错误):
function* writeToCsv(csvObj){
let chunkedArr = chunkify(csvObj);
chunkedArr.forEach(co.wrap(chunk => {
co.wrap(writeChunkToCsv(chunk))(dir)
}))
}
function* writeChunkToCsv(chunk){
return dir => {
yield fs.writeFile(dir, csv);
}
}
function chunkify(arr, partitions){
let ansArr = [];
for(let i = 0, currentArr = []; i < arr.length; i++){
if(i%(arr.length/partitions) === 0 || i === arr.length){
ansArr = ansArr.push([...currentArr]);
currentArr = [];
}
else{
currentArr.push(arr[i]);
}
}
return ansArr;
}