我有一个 Json 字符串,我试图在 python 中转换数据帧。下面的代码示例。
import json
import pandas as pd
data = '[[{"value": "Measure1", "formattedValue": "Measure1"}, {"value": "true", "formattedValue": "In"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "amt", "formattedValue": "amt"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "-2716.5299999999997", "formattedValue": "-2,716.53"}], [{"value": "Measure1", "formattedValue": "Measure1"}, {"value": "true", "formattedValue": "In"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "Crds", "formattedValue": "Crds"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "-45526.209999999999", "formattedValue": "-45,526.21"}], [{"value": "Measure1", "formattedValue": "Measure1"}, {"value": "true", "formattedValue": "In"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "Dbts", "formattedValue": "Dbts"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "42809.679999999993", "formattedValue": "42,809.68"}], [{"value": "Measure1", "formattedValue": "Measure1"}, {"value": "true", "formattedValue": "In"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "Count", "formattedValue": "Count"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "205", "formattedValue": "205.00"}],[{"value": "Measure1", "formattedValue": "Measure1"}, {"value": "false", "formattedValue": "Out"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "amt", "formattedValue": "amt"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "0", "formattedValue": "0.00"}], [{"value": "Measure1", "formattedValue": "Measure1"}, {"value": "false", "formattedValue": "Out"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "Crds", "formattedValue": "Crds"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "0", "formattedValue": "0.00"}], [{"value": "Measure1", "formattedValue": "Measure1"}, {"value": "false", "formattedValue": "Out"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "Dbts", "formattedValue": "Dbts"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "0", "formattedValue": "0.00"}], [{"value": "Measure1", "formattedValue": "Measure1"}, {"value": "false", "formattedValue": "Out"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "Count", "formattedValue": "Count"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "17", "formattedValue": "17.00"}]]'
data = json.loads(data)
note = []
# tableau has extra space in columns so adding space here.
columnName = ['amt','Crds','Dbts','Count']
for x in data:
tup = (x[0]['value'],x[3]['value'])
note.append(tup)
note = sorted(list(set(note)))
index = pd.MultiIndex.from_tuples(note)
df = pd.DataFrame(columns=columnName, index=index)
print (df)
这给出了以下输出。在输出中,列未填充,行顺序也与 json 不同。 (在 json 中 %null% 出现在 5311 记录之后)。
amt | 证书 | Dbts | 计数 | ||
---|---|---|---|---|---|
措施1 | %null% | NaN | NaN | NaN | NaN |
5311 | NaN | NaN | NaN | NaN |
但我试图获得如下输出。它的列值填充了数字,并且行的顺序与 json 中的顺序完全相同。关于如何实现这一目标的任何建议?谢谢。
amt | 证书 | Dbts | 计数 | ||
---|---|---|---|---|---|
措施1 | 5311 | -2,716.53 | -45,526.21 | 42,809.68 | 205.00 |
%null% | 0.0 | 0.0 | 0.0 | 17 |
答案 0 :(得分:2)
试试:
select sum(a.salary) as total_salary, a.id from q.staff a group by a.id
打印:
import json
import pandas as pd
from itertools import groupby
data = '[[{"value": "Measure1", "formattedValue": "Measure1"}, {"value": "true", "formattedValue": "In"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "amt", "formattedValue": "amt"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "-2716.5299999999997", "formattedValue": "-2,716.53"}], [{"value": "Measure1", "formattedValue": "Measure1"}, {"value": "true", "formattedValue": "In"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "Crds", "formattedValue": "Crds"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "-45526.209999999999", "formattedValue": "-45,526.21"}], [{"value": "Measure1", "formattedValue": "Measure1"}, {"value": "true", "formattedValue": "In"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "Dbts", "formattedValue": "Dbts"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "42809.679999999993", "formattedValue": "42,809.68"}], [{"value": "Measure1", "formattedValue": "Measure1"}, {"value": "true", "formattedValue": "In"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "Count", "formattedValue": "Count"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "5311", "formattedValue": "5311"}, {"value": "205", "formattedValue": "205.00"}],[{"value": "Measure1", "formattedValue": "Measure1"}, {"value": "false", "formattedValue": "Out"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "amt", "formattedValue": "amt"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "0", "formattedValue": "0.00"}], [{"value": "Measure1", "formattedValue": "Measure1"}, {"value": "false", "formattedValue": "Out"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "Crds", "formattedValue": "Crds"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "0", "formattedValue": "0.00"}], [{"value": "Measure1", "formattedValue": "Measure1"}, {"value": "false", "formattedValue": "Out"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "Dbts", "formattedValue": "Dbts"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "0", "formattedValue": "0.00"}], [{"value": "Measure1", "formattedValue": "Measure1"}, {"value": "false", "formattedValue": "Out"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "Count", "formattedValue": "Count"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "%null%", "formattedValue": "Null"}, {"value": "17", "formattedValue": "17.00"}]]'
data = json.loads(data)
all_data = []
for _, subl in groupby(data, key=lambda k: (k[0]["value"], k[2]["value"])):
subl = list(subl)
out = {"lvl_0": subl[0][0]["value"], "lvl_1": subl[0][2]["value"]}
for v in subl:
out[v[4]["value"]] = v[-1]["formattedValue"]
all_data.append(out)
df = pd.DataFrame(all_data).set_index(["lvl_0", "lvl_1"])
df.index.names = [None, None]
print(df)