df=pd.read_csv('./data/reward_original_91011.csv')
ind0=df[df['DatePay'].isnull()].index
df.ix[ind0]['ValuePay']=0
ind1=df[~df['DatePay'].isnull()].index
df['DatePay'] = pd.to_datetime(df['DatePay'])
df['DateDue'] = pd.to_datetime(df['DateDue'])
ind2=df.ix[ind1][((df.ix[ind1]['DatePay']-df.ix[ind1]['DateDue'])/np.timedelta64(1,'h')).astype(int)>24].index
**df.ix[ind2]['ValuePay']=0**
有没有人可以给我一些提示?为什么我可以成功修改上面的粗体代码?
答案 0 :(得分:0)
我认为你需要:
df = pd.read_csv('./data/reward_original_91011.csv', parse_dates=['DatePay','DateDue'])
#get boolean mask where NaN
mask = df['DatePay'].isnull()
#modify column ValuePay by mask
df.loc[mask, 'ValuePay'] = 0
#get another mask
mask1 = ((df['DatePay'] - df['DateDue']) / np.timedelta64(1,'h')) > 24
#inverted mask AND mask1 -> modify ValuePay
df.loc[~mask & mask1, 'ValuePay'] = 0
样品:
d = {'DateDue': [pd.Timestamp('2013-10-06 00:00:00'), pd.Timestamp('2013-10-08 00:00:00'),
pd.Timestamp('2013-10-08 00:00:00'), pd.Timestamp('2013-10-08 00:00:00')],
'DatePay': [np.NaN, pd.Timestamp('2013-10-06 00:00:00'),
pd.Timestamp('2013-10-12 00:00:00'), pd.Timestamp('2013-10-10 00:00:00')]}
df = pd.DataFrame(d)
print (df)
DateDue DatePay
0 2013-10-06 NaT
1 2013-10-08 2013-10-06
2 2013-10-08 2013-10-12
3 2013-10-08 2013-10-10
#get boolean mask where NaN
mask = df['DatePay'].isnull()
#modify column ValuePay by mask
df.loc[mask, 'ValuePay'] = 0
#get another mask
mask1 = ((df['DatePay'] - df['DateDue']) / np.timedelta64(1,'h')) > 24
#inverted mask AND mask1 -> modify ValuePay
df.loc[~mask & mask1, 'ValuePay'] = 0
print (df)
DateDue DatePay ValuePay
0 2013-10-06 NaT 0.0
1 2013-10-08 2013-10-06 NaN
2 2013-10-08 2013-10-12 0.0
3 2013-10-08 2013-10-10 0.0