将 Json 转换为数据框

时间:2021-07-29 12:36:54

标签: python pandas dataframe

我有一个 Json 字符串,我试图在 python 中转换数据帧。下面的代码示例。

import json 
import pandas as pd  

data = '[[{"value": "Measure1", "formattedValue": "Measure1"}, {"value": "true", "formattedValue": "In"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "amt", "formattedValue": "amt"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "-2716.5299999999997", "formattedValue": "-2,716.53"}], [{"value": "Measure1", "formattedValue": "Measure1"}, {"value": "true", "formattedValue": "In"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "Crds", "formattedValue": "Crds"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "-45526.209999999999", "formattedValue": "-45,526.21"}], [{"value": "Measure1", "formattedValue": "Measure1"}, {"value": "true", "formattedValue": "In"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "Dbts", "formattedValue": "Dbts"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "42809.679999999993", "formattedValue": "42,809.68"}], [{"value": "Measure1", "formattedValue": "Measure1"}, {"value": "true", "formattedValue": "In"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "Count", "formattedValue": "Count"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "205", "formattedValue": "205.00"}],[{"value": "Measure1", "formattedValue": "Measure1"}, {"value": "false", "formattedValue": "Out"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "amt", "formattedValue": "amt"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "0", "formattedValue": "0.00"}], [{"value": "Measure1", "formattedValue": "Measure1"}, {"value": "false", "formattedValue": "Out"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "Crds", "formattedValue": "Crds"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "0", "formattedValue": "0.00"}], [{"value": "Measure1", "formattedValue": "Measure1"}, {"value": "false", "formattedValue": "Out"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "Dbts", "formattedValue": "Dbts"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "0", "formattedValue": "0.00"}], [{"value": "Measure1", "formattedValue": "Measure1"}, {"value": "false", "formattedValue": "Out"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "Count", "formattedValue": "Count"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "17", "formattedValue": "17.00"}]]'
data = json.loads(data)
note = []
# tableau has  extra space in columns so adding space here.
columnName = ['amt','Crds','Dbts','Count']
for x in data:
    tup = (x[0]['value'],x[3]['value'])
    note.append(tup) 
    
note = sorted(list(set(note))) 
index = pd.MultiIndex.from_tuples(note)
df = pd.DataFrame(columns=columnName, index=index) 
print (df)

这给出了以下输出。在输出中,列未填充,行顺序也与 json 不同。 (在 json 中 %null% 出现在 5311 记录之后)。

<头>
amt 证书 Dbts 计数
措施1 %null% NaN NaN NaN NaN
5311 NaN NaN NaN NaN

但我试图获得如下输出。它的列值填充了数字,并且行的顺序与 json 中的顺序完全相同。关于如何实现这一目标的任何建议?谢谢。

<头>
amt 证书 Dbts 计数
措施1 5311 -2,716.53 -45,526.21 42,809.68 205.00
%null% 0.0 0.0 0.0 17

1 个答案:

答案 0 :(得分:2)

试试:

select sum(a.salary) as total_salary, a.id from q.staff a group by a.id

打印:

import json
import pandas as pd
from itertools import groupby

data = '[[{"value": "Measure1", "formattedValue": "Measure1"}, {"value": "true", "formattedValue": "In"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "amt", "formattedValue": "amt"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "-2716.5299999999997", "formattedValue": "-2,716.53"}], [{"value": "Measure1", "formattedValue": "Measure1"}, {"value": "true", "formattedValue": "In"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "Crds", "formattedValue": "Crds"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "-45526.209999999999", "formattedValue": "-45,526.21"}], [{"value": "Measure1", "formattedValue": "Measure1"}, {"value": "true", "formattedValue": "In"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "Dbts", "formattedValue": "Dbts"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "42809.679999999993", "formattedValue": "42,809.68"}], [{"value": "Measure1", "formattedValue": "Measure1"}, {"value": "true", "formattedValue": "In"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "Count", "formattedValue": "Count"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "205", "formattedValue": "205.00"}],[{"value": "Measure1", "formattedValue": "Measure1"}, {"value": "false", "formattedValue": "Out"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "amt", "formattedValue": "amt"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "0", "formattedValue": "0.00"}], [{"value": "Measure1", "formattedValue": "Measure1"}, {"value": "false", "formattedValue": "Out"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "Crds", "formattedValue": "Crds"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "0", "formattedValue": "0.00"}], [{"value": "Measure1", "formattedValue": "Measure1"}, {"value": "false", "formattedValue": "Out"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "Dbts", "formattedValue": "Dbts"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "0", "formattedValue": "0.00"}], [{"value": "Measure1", "formattedValue": "Measure1"}, {"value": "false", "formattedValue": "Out"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "Count", "formattedValue": "Count"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "17", "formattedValue": "17.00"}]]'
data = json.loads(data)

all_data = []
for _, subl in groupby(data, key=lambda k: (k[0]["value"], k[2]["value"])):
    subl = list(subl)
    out = {"lvl_0": subl[0][0]["value"], "lvl_1": subl[0][2]["value"]}
    for v in subl:
        out[v[4]["value"]] = v[-1]["formattedValue"]
    all_data.append(out)

df = pd.DataFrame(all_data).set_index(["lvl_0", "lvl_1"])
df.index.names = [None, None]
print(df)
相关问题