将CSV文件转换为JSON字典?

时间:2015-10-28 21:01:03

标签: javascript json node.js csv

我需要将大型CSV数据集转换为JSON,但输出应该是这样的JSON字典:

var products = {
    "crystal": {
        "description": "This is a crystal",
        "price": "2.95"
    },
    "emerald": {
        "description": "This is a emerald",
        "price": "5.95"
    }
};

这就是CSV表的样子: enter image description here

我使用引用here的脚本来生成JSON:

var csv = require('csv')
var fs = require('fs')
var f = fs.createReadStream('Fielding.csv')
var w = fs.createWriteStream('out.txt')

w.write('[');

csv()
.from.stream(f, {columns:true})
.transform(function(row, index) {
    return (index === 0 ? '' : ',\n') + JSON.stringify(row);
})
.to.stream(w, {columns: true, end: false})
.on('end', function() {
     w.write(']');
     w.end();
 });

但是,该脚本的输出是以这种格式创建的:

[
    {
        "name": "crystal",
        "description": "This is a crystal",
        "price": "2.95"
    },
    {
        "name": "emerald",
        "description": "This is a emerald",
        "price": "5.95"
    }
]

如何修改脚本以获得所需的“字典”格式?

5 个答案:

答案 0 :(得分:2)

您需要做的就是遍历数组并使用item.name作为字典对象的键

var products ={};

data.forEach(function(item){
     products[item.name] = item;
});

这会在项目中保留name属性,但这不应该成为问题

答案 1 :(得分:2)

我发现csv parser库最有用:

var csvText=`status,path,name,ext,checksum,size,document_service_id,document_service_path,message
success,./15-02-2017_17-11/d77c7886-ffe9-40f2-b2fe-e68410d07891//expE1.txt,expE1.txt,txt,38441337865069eabae7754b29bb43e1,414984,8269f7e3-3221-49bb-bb5a-5796cf208fd1,/neuroinftest/20170215/expE1.txt,
success,./15-02-2017_17-11/d77c7886-ffe9-40f2-b2fe-e68410d07891//expE10.txt,expE10.txt,txt,f27e46979035706eb0aaf58c26e09585,368573,2c94ed19-29c9-4660-83cf-c2148c3d6f61,/neuroinftest/20170215/expE10.txt,
success,./15-02-2017_17-11/d77c7886-ffe9-40f2-b2fe-e68410d07891//expE2.txt,expE2.txt,txt,e1040d9546423c823944120de0e5c46c,333308,b3898f5d-1058-4cf3-acf9-76759117b810,/neuroinftest/20170215/expE2.txt,
`
var csv = require('csv');
csv.parse(csvText, {columns: true}, function(err, data){
    console.log(JSON.stringify(data, null, 2));
});

在变量csvText中,我有逗号分隔文件,第一行用作标题。我使用parse函数,我传递{columns: true}表示第一行有标题。回调函数中的第二个参数(data)具有对象,其中键是标题,值是对应的csv单元格。我使用JSON.stringify很好地打印它,结果对象看起来像这样(它把它放到一个数组中):

[
  {
    "status": "success",
    "path": "./15-02-2017_17-11/d77c7886-ffe9-40f2-b2fe-e68410d07891//expE1.txt",
    "name": "expE1.txt",
    "ext": "txt",
    "checksum": "38441337865069eabae7754b29bb43e1",
    "size": "414984",
    "document_service_id": "8269f7e3-3221-49bb-bb5a-5796cf208fd1",
    "document_service_path": "/neuroinftest/20170215/expE1.txt",
    "message": ""
  },
  {
    "status": "success",
    "path": "./15-02-2017_17-11/d77c7886-ffe9-40f2-b2fe-e68410d07891//expE10.txt",
    "name": "expE10.txt",
    "ext": "txt",
    "checksum": "f27e46979035706eb0aaf58c26e09585",
    "size": "368573",
    "document_service_id": "2c94ed19-29c9-4660-83cf-c2148c3d6f61",
    "document_service_path": "/neuroinftest/20170215/expE10.txt",
    "message": ""
  },
  {
    "status": "success",
    "path": "./15-02-2017_17-11/d77c7886-ffe9-40f2-b2fe-e68410d07891//expE2.txt",
    "name": "expE2.txt",
    "ext": "txt",
    "checksum": "e1040d9546423c823944120de0e5c46c",
    "size": "333308",
    "document_service_id": "b3898f5d-1058-4cf3-acf9-76759117b810",
    "document_service_path": "/neuroinftest/20170215/expE2.txt",
    "message": ""
  }
]

UPD :可以使用reduce轻松将此数组转换为您需要的对象:

var res_obj = data.reduce(function(acc, cur, i) {
  acc[cur.name] = cur;
  return acc;
}, {});

在我的情况下,我使用name属性作为键。确保它是独一无二的。

答案 2 :(得分:1)

我认为这样的事情会起作用:

var products_arr = [{"name":"crystal","description":"This is a crystal","price":"2.95"},
 {"name":"emerald","description":"This is a emerald","price":"5.95"}]
var products = {};

for (var i = 0, l = products_arr.length ; i < l ; ++i) {
    var x = products_arr[i];
    var name = x.name
    delete x.name; // deletes name property from JSON object
    products[name] = x;
}

这将输出:

{
  "crystal": {
    "description": "This is a crystal",
    "price": "2.95"
  },
  "emerald": {
    "description": "This is a emerald",
    "price": "5.95"
  }
}

答案 3 :(得分:0)

如果您想修改特定代码,可以更改行

 return (index === 0 ? '' : ',\n') + JSON.stringify(row);

 var clonedRow = JSON.parse(JSON.stringify(row));
 var key = clonedRow['name'];
 delete clonedRow['name'];
 var newRow = {};
 newRow[key] = clonedRow;
 return (index === 0 ? '' : ',\n') + JSON.stringify(newRow);

这会为每一行创建一个新对象,根据您的要求修改结构。

答案 4 :(得分:-1)

最好的办法是使用强大的csv解析器/转储器PapaParse。它支持流,各种字符串编码,标题行,并且速度很快。