df = pd.DataFrame({'A': [1,2,3,1,2,3], 'B': [10,10,11,10,10,15], 'key1':['a','b','a','b','c','c'],'key2':1})
df1 = pd.DataFrame({'A': [1,2,3,1,2,3], 'B': [100,100,110,100,100,150], 'key1':['a','c','b','a','a','c'],'key2':1})
dfn = pd.merge(df,df1,on='key2')
dfn_grouped = dfn.groupby('key1_y')
the list(dfn_grouped):
[('a', A_x B_x key1_x key2 A_y B_y key1_y
0 1 10 a 1 1 100 a
3 1 10 a 1 1 100 a
... ... ... ...
33 3 15 c 1 1 100 a
34 3 15 c 1 2 100 a),
('b', A_x B_x key1_x key2 A_y B_y key1_y
2 1 10 a 1 3 110 b
8 2 10 b 1 3 110 b
14 3 11 a 1 3 110 b
20 1 10 b 1 3 110 b
26 2 10 c 1 3 110 b
32 3 15 c 1 3 110 b),
('c', A_x B_x key1_x key2 A_y B_y key1_y
1 1 10 a 1 2 100 c
...... ... ....
35 3 15 c 1 3 150 c)]
现在我需要通过“key1_x”将dfn_grouped分组,并将其连接到像A_x这样的dict:A_y
key1_y key1_x A_X:A_Y
b a {'10':'110','11':110}
b b {'10':110}
b c {'10':110,'15':110}
// if A_x in dict append the A_y like:
// b e {'10':[11,12]}
答案 0 :(得分:1)
这是您需要的吗?:
>> grouped = dfn.groupby(['key1_y','key1_x','A_x'])
>> dfg = pd.DataFrame(grouped.apply(lambda x: [a for a in x.A_y])).reset_index()
>> dfg.columns = [u'key1_y', u'key1_x', u'A_x', 'dic_values']
>> dfg['dic'] = [{a:b} for a,b in zip(dfg.A_x.values,dfg.dic_values.values)]
>> dfg.drop(['A_x','dic_values'],1,inplace=True)
>> g_dics = dfg.groupby(['key1_y','key1_x']).apply(lambda x: dict(sum(map(dict.items, [d for d in x.dic]), [])))
>> pd.DataFrame(g_dics).reset_index()