我需要将大型CSV数据集转换为JSON,但输出应该是这样的JSON字典:
var products = {
"crystal": {
"description": "This is a crystal",
"price": "2.95"
},
"emerald": {
"description": "This is a emerald",
"price": "5.95"
}
};
我使用引用here的脚本来生成JSON:
var csv = require('csv')
var fs = require('fs')
var f = fs.createReadStream('Fielding.csv')
var w = fs.createWriteStream('out.txt')
w.write('[');
csv()
.from.stream(f, {columns:true})
.transform(function(row, index) {
return (index === 0 ? '' : ',\n') + JSON.stringify(row);
})
.to.stream(w, {columns: true, end: false})
.on('end', function() {
w.write(']');
w.end();
});
但是,该脚本的输出是以这种格式创建的:
[
{
"name": "crystal",
"description": "This is a crystal",
"price": "2.95"
},
{
"name": "emerald",
"description": "This is a emerald",
"price": "5.95"
}
]
如何修改脚本以获得所需的“字典”格式?
答案 0 :(得分:2)
您需要做的就是遍历数组并使用item.name
作为字典对象的键
var products ={};
data.forEach(function(item){
products[item.name] = item;
});
这会在项目中保留name
属性,但这不应该成为问题
答案 1 :(得分:2)
我发现csv parser库最有用:
var csvText=`status,path,name,ext,checksum,size,document_service_id,document_service_path,message
success,./15-02-2017_17-11/d77c7886-ffe9-40f2-b2fe-e68410d07891//expE1.txt,expE1.txt,txt,38441337865069eabae7754b29bb43e1,414984,8269f7e3-3221-49bb-bb5a-5796cf208fd1,/neuroinftest/20170215/expE1.txt,
success,./15-02-2017_17-11/d77c7886-ffe9-40f2-b2fe-e68410d07891//expE10.txt,expE10.txt,txt,f27e46979035706eb0aaf58c26e09585,368573,2c94ed19-29c9-4660-83cf-c2148c3d6f61,/neuroinftest/20170215/expE10.txt,
success,./15-02-2017_17-11/d77c7886-ffe9-40f2-b2fe-e68410d07891//expE2.txt,expE2.txt,txt,e1040d9546423c823944120de0e5c46c,333308,b3898f5d-1058-4cf3-acf9-76759117b810,/neuroinftest/20170215/expE2.txt,
`
var csv = require('csv');
csv.parse(csvText, {columns: true}, function(err, data){
console.log(JSON.stringify(data, null, 2));
});
在变量csvText
中,我有逗号分隔文件,第一行用作标题。我使用parse
函数,我传递{columns: true}
表示第一行有标题。回调函数中的第二个参数(data
)具有对象,其中键是标题,值是对应的csv单元格。我使用JSON.stringify
很好地打印它,结果对象看起来像这样(它把它放到一个数组中):
[
{
"status": "success",
"path": "./15-02-2017_17-11/d77c7886-ffe9-40f2-b2fe-e68410d07891//expE1.txt",
"name": "expE1.txt",
"ext": "txt",
"checksum": "38441337865069eabae7754b29bb43e1",
"size": "414984",
"document_service_id": "8269f7e3-3221-49bb-bb5a-5796cf208fd1",
"document_service_path": "/neuroinftest/20170215/expE1.txt",
"message": ""
},
{
"status": "success",
"path": "./15-02-2017_17-11/d77c7886-ffe9-40f2-b2fe-e68410d07891//expE10.txt",
"name": "expE10.txt",
"ext": "txt",
"checksum": "f27e46979035706eb0aaf58c26e09585",
"size": "368573",
"document_service_id": "2c94ed19-29c9-4660-83cf-c2148c3d6f61",
"document_service_path": "/neuroinftest/20170215/expE10.txt",
"message": ""
},
{
"status": "success",
"path": "./15-02-2017_17-11/d77c7886-ffe9-40f2-b2fe-e68410d07891//expE2.txt",
"name": "expE2.txt",
"ext": "txt",
"checksum": "e1040d9546423c823944120de0e5c46c",
"size": "333308",
"document_service_id": "b3898f5d-1058-4cf3-acf9-76759117b810",
"document_service_path": "/neuroinftest/20170215/expE2.txt",
"message": ""
}
]
UPD :可以使用reduce
轻松将此数组转换为您需要的对象:
var res_obj = data.reduce(function(acc, cur, i) {
acc[cur.name] = cur;
return acc;
}, {});
在我的情况下,我使用name属性作为键。确保它是独一无二的。
答案 2 :(得分:1)
我认为这样的事情会起作用:
var products_arr = [{"name":"crystal","description":"This is a crystal","price":"2.95"},
{"name":"emerald","description":"This is a emerald","price":"5.95"}]
var products = {};
for (var i = 0, l = products_arr.length ; i < l ; ++i) {
var x = products_arr[i];
var name = x.name
delete x.name; // deletes name property from JSON object
products[name] = x;
}
这将输出:
{
"crystal": {
"description": "This is a crystal",
"price": "2.95"
},
"emerald": {
"description": "This is a emerald",
"price": "5.95"
}
}
答案 3 :(得分:0)
如果您想修改特定代码,可以更改行
return (index === 0 ? '' : ',\n') + JSON.stringify(row);
到
var clonedRow = JSON.parse(JSON.stringify(row));
var key = clonedRow['name'];
delete clonedRow['name'];
var newRow = {};
newRow[key] = clonedRow;
return (index === 0 ? '' : ',\n') + JSON.stringify(newRow);
这会为每一行创建一个新对象,根据您的要求修改结构。
答案 4 :(得分:-1)
最好的办法是使用强大的csv解析器/转储器PapaParse。它支持流,各种字符串编码,标题行,并且速度很快。