将Pandas Dataframe转换为自定义嵌套JSON

时间:2019-01-23 16:56:13

标签: python json pandas

我必须将Pandas Dataframe转换为嵌套的json。我尝试使用to_json,但是它将整个数据帧转换为键值对,不确定如何转换这样的嵌套json。任何帮助表示赞赏。

我的数据框:

df=pd.DataFrame({'id':[1,2,3,4,5,6,7],'name':['walmart','walmart dc','walmart supercenter','wal','walmart 5603','walmart#5603'
                                         ,'Sams walmart'],'Cluster_id':[123,123,123,123,123,123,123],
             'Cluster_name':['walmart','walmart','walmart','walmart','walmart','walmart','walmart'],'House_num':[123,456,789
                                                                                                                ,654,321,102,945]
            ,'Street':['Main Street','Main Street','Main Street','Main Street','Main Street','Main Street','Main Street'],
             'Cluster_Street':['Main Street','Main Street','Main Street','Main Street','Main Street','Main Street','Main Street'],
             'Cluster_House_Num':[456,456,456,456,456,456,456]
            })

enter image description here

输出JSON:

{
'cluster_id':123
'cluster_name':'walmart'
'address':{
    'House_num':456
    'Street': 'Main Street'
}
'records':[{
    'id':1
    'name':'walmart'
    'address':{
    'House_num':123
    'Street': 'Main Street'
}
},{
    'id':2
    'name':'walmart dc'
    'address':{
    'House_num':456
    'Street': 'Main Street'
}
},{
    'id':3
    'name':'walmart supercenter'
    'address':{
    'House_num':789
    'Street': 'Main Street'
}
},{
    'id':4
    'name':'wal'
    'address':{
    'House_num':654
    'Street': 'Main Street'
}
},{
    'id':5
    'name':'walmart 5603'
    'address':{
    'House_num':321
    'Street': 'Main Street'
}
},{
    'id':6
    'name':'walmart#5603'
    'address':{
    'House_num':102
    'Street': 'Main Street'
}
},{
    'id':7
    'name':'Sams walmart'
    'address':{
    'House_num':945
    'Street': 'Main Street'
}
}]

}

2 个答案:

答案 0 :(得分:1)

import pandas as pd

df=pd.DataFrame({'id':[1,2,3,4,5,6,7],'name':['walmart','walmart dc','walmart supercenter','wal','walmart 5603','walmart#5603'
                                             ,'Sams walmart'],'cluster_id':[123,123,123,123,123,123,123],
                 'cluster_name':['walmart','walmart','walmart','walmart','walmart','walmart','walmart'],'House_num':[456,456,456
                                                                                                                    ,456,456,456,456]
                ,'Street':['Main Street','Main Street','Main Street','Main Street','Main Street','Main Street','Main Street']
                })

df_cluster = df.groupby('cluster_id')
for cluster_id, group in df_cluster:

    records = []
    for row, data in group.iterrows():
        rec_dict = {'id':data[4],
                  'name':data[3],
                  'address':{
                      'House_num':data[0],
                      'Street': data[1]
                      }
                  }
        records.append(rec_dict)

    out_dict = {'cluster_id':cluster_id,'records':records}
    print (out_dict)

答案 1 :(得分:0)

dic = eval(df.to_json(orient="records"))

a = df.apply(pd.Series.nunique)
lst = list(a[a==1].index)      #getting columns with exactly 1 unique 
lst
['House_num', 'Street', 'cluster_id', 'cluster_name']

final_dic = dict()
for key in lst:
    val = dic[0][key]
    for i in dic:
        i.pop(key, None)
        final_dic[key] = val
final_dic["records"] = dic
final_dic["address"] = {"House_num":final_dic["House_num"],"street":final_dic["Street"]}
final_dic.pop("Street")
final_dic.pop("House_num")
final_dic

 {'address': {'House_num': 456, 'street': 'Main Street'},
 'cluster_id': 123,
 'cluster_name': 'walmart',
 'records': [{'id': 1, 'name': 'walmart'},
  {'id': 2, 'name': 'walmart dc'},
  {'id': 3, 'name': 'walmart supercenter'},
  {'id': 4, 'name': 'wal'},
  {'id': 5, 'name': 'walmart 5603'},
  {'id': 6, 'name': 'walmart#5603'},
  {'id': 7, 'name': 'Sams walmart'}]
 }