我正在与一个大型数据框合并进行摔跤,但这是一个非常小的问题...循环工作,但它太慢......
import pandas as pd
import numpy as np
df = pd.DataFrame({"A": [1,1,2,2,2,3,3,3,3,4,4],
"B": [ np.nan , 10, np.nan, 20, np.nan, np.nan,np.nan,np.nan,np.nan,40, np.nan]})
In[1]: df
Out[1]:
A B
0 1 NaN
1 1 10.0
2 2 NaN
3 2 20.0
4 2 NaN
5 3 NaN
6 3 NaN
7 3 NaN
8 3 NaN
9 4 40.0
10 4 NaN
df2 = pd.DataFrame({"A": [1,1,2,2,2,3,3,3,3,4,4],
"B": [ 10 , 10, 20, 20, 20, np.nan,np.nan,np.nan,np.nan,40, 40]})
In[1]: df2
Out[18]:
A B
0 1 10.0
1 1 10.0
2 2 20.0
3 2 20.0
4 2 20.0
5 3 NaN
6 3 NaN
7 3 NaN
8 3 NaN
9 4 40.0
10 4 40.0
答案 0 :(得分:2)
IIUC你可以这样做:
In [91]: df.groupby('A').apply(lambda x: x.ffill().bfill())
Out[91]:
A B
0 1 10.0
1 1 10.0
2 2 20.0
3 2 20.0
4 2 20.0
5 3 NaN
6 3 NaN
7 3 NaN
8 3 NaN
9 4 40.0
10 4 40.0
答案 1 :(得分:0)
如果每个子组只有一个值
df.B=df.A.map(df.set_index('A').dropna().B.to_dict())
df
Out[717]:
A B
0 1 10.0
1 1 10.0
2 2 20.0
3 2 20.0
4 2 20.0
5 3 NaN
6 3 NaN
7 3 NaN
8 3 NaN
9 4 40.0
10 4 40.0