我需要转换以下格式的df:
d = {
'A': ['a1', 'a1', 'a1', 'a1', 'a1', 'a1', 'a1', 'a2', 'a2', 'a2', 'a2', 'a2', 'a2', 'a2', 'a2'],
'B': ['b1', 'b1', 'b1', 'b1', 'b2', 'b2', 'b2', 'b3', 'b3', 'b3', 'b3', 'b3', 'b3', 'b4', 'b4', ],
'C': ['c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9', 'c10', 'c11', 'c12', 'c13', 'c14', 'c15', ],
}
df = pd.DataFrame(d)
df
A B C
0 a1 b1 c1
1 a1 b1 c2
2 a1 b1 c3
3 a1 b1 c4
4 a1 b2 c5
5 a1 b2 c6
6 a1 b2 c7
7 a2 b3 c8
8 a2 b3 c9
9 a2 b3 c10
10 a2 b3 c11
11 a2 b3 c12
12 a2 b3 c13
13 a2 b4 c14
14 a2 b4 c15
以以下格式输入字典:
outDict = {
'a1': {
'b1': ['c1', 'c2', 'c3', 'c4'],
'b2': ['c5', 'c6', 'c7'],
},
'a2': {
'b3': ['c8', 'c9', 'c10', 'c11', 'c12', 'c13'],
'b4': ['c14', 'c15'],
}
}
即A列中的值成为第一级键; B列二级键中的值和C列中的值的列表。
有指针吗?
答案 0 :(得分:4)
这是使用pivot_table
的另一种方式:
out = {k:v.dropna().to_dict() for k,v in
df.pivot_table('C','B','A',aggfunc=list).items()}
{'a1': {'b1': ['c1', 'c2', 'c3', 'c4'], 'b2': ['c5', 'c6', 'c7']},
'a2': {'b3': ['c8', 'c9', 'c10', 'c11', 'c12', 'c13'], 'b4': ['c14', 'c15']}}
答案 1 :(得分:3)
这有点长,我敢说:
new_dict = {k: v['C'] for k,v in df.groupby(['A', 'B'])
.agg(list).groupby(level=0)
.apply(lambda df: df.xs(df.name)
.to_dict()).to_dict().items()}
print(new_dict)
输出:
{
'a1': {
'b1': ['c1', 'c2', 'c3', 'c4'],
'b2': ['c5', 'c6', 'c7'],
},
'a2': {
'b3': ['c8', 'c9', 'c10', 'c11', 'c12', 'c13'],
'b4': ['c14', 'c15'],
}
}
未包装:
>>> df.groupby(['A', 'B']).agg(list)
C
A B
a1 b1 [c1, c2, c3, c4]
b2 [c5, c6, c7]
a2 b3 [c8, c9, c10, c11, c12, c13]
b4 [c14, c15]
>>> df.groupby(['A', 'B']).agg(list).groupby(level=0).apply(lambda df: df.xs(df.name).to_dict())
# we groupby level 0 again, then call xs as aggregator function to access each key
# in level 0, and convert to dict
A
a1 {'C': {'b1': ['c1', 'c2', 'c3', 'c4'], 'b2': [...
a2 {'C': {'b3': ['c8', 'c9', 'c10', 'c11', 'c12',...
dtype: object
>>> df.groupby(['A', 'B']).agg(list).groupby(level=0).apply(lambda df: df.xs(df.name).to_dict()).to_dict()
{'a1': {'C': {'b1': ['c1', 'c2', 'c3', 'c4'], 'b2': ['c5', 'c6', 'c7']}},
'a2': {'C': {'b3': ['c8', 'c9', 'c10', 'c11', 'c12', 'c13'],
'b4': ['c14', 'c15']}}}
# then just using dict comp to remove column name 'C'
答案 2 :(得分:2)
递归解决方案似乎很自然,并且可以用于任意数量的列。我们在最左边的列上groupby
,然后将其余的列递归转换为所需的格式。如果仅剩一列,则会返回一个列表。
def df2dict_rec(df):
if df.shape[1] == 1:
return df.values[:,0].tolist()
else:
return {k: df2dict_rec(df_k.iloc[:,1:]) for k, df_k in df.groupby(df.columns[0])}
res = df2dict_rec(df)
# {'a1': {'b1': ['c1', 'c2', 'c3', 'c4'], 'b2': ['c5', 'c6', 'c7']},
# 'a2': {'b3': ['c8', 'c9', 'c10', 'c11', 'c12', 'c13'], 'b4': ['c14', 'c15']}}
答案 3 :(得分:0)
欢迎任何增强功能
dictLevel1 = {}
dictLevel2 = {}
for b,c in zip(list(df['B']),list(df['C'])):
try:
dictLevel2[b].append(c)
except KeyError:
dictLevel2[b] = [c]
for a,b in zip(list(df['A']),list(df['B'])):
try:
dictLevel1[a].update({b:dictLevel2[b]})
except:
dictLevel1[a] = {b: dictLevel2[b]}
print(dictLevel1)
输出
{'a1': {'b1': ['c1', 'c2', 'c3', 'c4'], 'b2': ['c5', 'c6', 'c7']}, 'a2': {'b3': ['c8', 'c9', 'c10', 'c11', 'c12', 'c13'], 'b4': ['c14', 'c15']}}