数据
pb = {"mark_up_id":{"0":"123","1":"456","2":"789","3":"111","4":"222"},"mark_up":{"0":1.2987,"1":1.5625,"2":1.3698,"3":1.3333,"4":1.4589}}
data = {"id":{"0":"K69","1":"K70","2":"K71","3":"K72","4":"K73","5":"K74","6":"K75","7":"K79","8":"K86","9":"K100"},"cost":{"0":29.74,"1":9.42,"2":9.42,"3":9.42,"4":9.48,"5":9.48,"6":24.36,"7":5.16,"8":9.8,"9":3.28},"mark_up_id":{"0":"123","1":"456","2":"789","3":"111","4":"222","5":"333","6":"444","7":"555","8":"666","9":"777"}}
pb = pd.DataFrame(data=pb).set_index('mark_up_id')
df = pd.DataFrame(data=data)
预期产量
test = df.join(pb, on='mark_up_id', how='left')
test['cost'].update(test['cost'] + test['mark_up'])
test.drop('mark_up',axis=1,inplace=True)
或..
df['cost'].update(df['mark_up_id'].map(pb['mark_up']) + df['cost'])
问题
是否有执行上述操作的函数,或者这是进行此类操作的最佳方法?
答案 0 :(得分:2)
我将使用您提出的第二种解决方案,或者更好的方法:
df['cost']=(df['mark_up_id'].map(pb['mark_up']) + df['cost']).fillna(df['cost'])
我认为使用更新可能会感到不舒服,因为它不会返回任何内容。
假设Series.fillna
更灵活。
我们也可以使用DataFrame.assign
为了继续处理分配返回的DataFrame。
df.assign( Cost=(df['mark_up_id'].map(pb['mark_up']) + df['cost']).fillna(df['cost']) )
使用join
方法的时间比较
%%timeit
df['cost']=(df['mark_up_id'].map(pb['mark_up']) + df['cost']).fillna(df['cost'])
#945 µs ± 46 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
%%timeit
test = df.join(pb, on='mark_up_id', how='left')
test['cost'].update(test['cost'] + test['mark_up'])
test.drop('mark_up',axis=1,inplace=True)
#3.59 ms ± 137 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
慢。
%%timeit
df['cost'].update(df['mark_up_id'].map(pb['mark_up']) + df['cost'])
#985 µs ± 32.8 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)