我有一个数据框和一个嵌套字典。我需要将字典中的键与数据框的列名称进行匹配,并追加一个包含所述字典的正确值的列。最终值在字典中为两个或三个。但是,此循环已经运行了两天。我认为我的循环效率不高。任何人都可以快速浏览一下这种语法并发现任何低效之处吗?
for index, row in df.iterrows():
for k1, v1 in model_4_factors.items():
for k2, v2 in v1.items():
if isinstance(v2, dict):
for k3, v3 in v2.items():
if k2 == row['RATING_CLASS_CODE'] and k3 == row['unit_value_model']:
df['value_factor_4'] = v3
elif k2 == row['RATING_CLASS_CODE'] and k3 == row['MVEH_CC_Model']:
df['cc_factor_4'] = v3
elif k2 == row['term_model'] and k3 == row['advanced_purchase_days_model']:
df['advanced_days_factor_4'] = v3
elif k2 == row['marital_status_model'] and k3 == row['Driver_Age_model']:
df['driver_age_factor_4'] = v3
else:
df['value_factor_4'] = None
else:
if row['mveh_pkg_typ_cd'] == k2:
df['mveh_pkg_typ_cd_factor_4'] = v2
elif row['CLded'] == k2:
df['CLded_factor_4'] = v2
elif row['unit_drv_exp_model'] == k2:
df['unit_drv_exp_model'] = v2
elif row['v_age_model'] == k2:
df['v_age_model_factor_4'] = v2
elif row['years_owned_model'] == k2:
df['years_owned_model_factor_4'] = v2
elif row['FIN_RESP_CD'] == k2:
df['FIN_RESP_CD_factor_4'] = v2
elif row['st_ad_moto_safety_course'] == k2:
df['st_ad_moto_safety_course_factor_4'] = v2
elif row['st_ai_instructor_course'] == k2:
df['st_ai_instructor_course_factor_4'] = v2
elif row['st_ci_instructor_course'] == k2:
df['st_ci_instructor_course_factor_4'] = v2
elif row['st_mf_moto_safety_course '] == k2:
df['st_mf_moto_safety_course_factor_4'] = v2
elif row['st_mi_instructor_course'] == k2:
df['st_mi_instructor_course_factor_4'] = v2
elif row['st_mc_moto_safety_course'] == k2:
df['st_mc_moto_safety_course_factor_4'] = v2
elif row['st_rc_moto_safety_course'] == k2:
df['st_rc_moto_safety_course_factor_4'] = v2
elif row['st_ri_instructor_course'] == k2:
df['st_ri_instructor_course_factor_4'] = v2
elif row['ds_pn_prior_insurance'] == k2:
df['ds_pn_prior_insurance_factor_4'] = v2
elif row['st_rc_moto_safety_course'] == k2:
df['st_rc_moto_safety_course_factor_4'] = v2
elif row['Loyalty'] == k2:
df['Loyalty_factor_4'] = v2
elif row['ds_mc_motorcycle_endorsement'] == k2:
df['ds_mc_motorcycle_endorsement_factor_4'] = v2
elif row['multi_unit_model2'] == k2:
df['multi_unit_model2_factor_4'] = v2
elif row['ds_ad_affinity'] == k2:
df['ds_ad_affinity_factor_4'] = v2
elif row['ds_ak_alliance'] == k2:
df['ds_ak_alliance_factor_4'] = v2
elif row['multi_policy_count_model'] == k2:
df['multi_policy_count_model_factor_4'] = v2
elif row['ds_fp_paid_in_full'] == k2:
df['ds_fp_paid_in_full_factor_4'] = v2
else:
pass
数据框大约有80万行。字典也是一个很大的代码块。下面的字典和DF中的示例。最后我要的是factors' is the dictionary I'm working with.
df_result`减去各列的顺序。
import pandas as pd
vn_value = {23000: 1, 30000: 2, 26000: 3}
g_cn_value = {2000:1, 3000: 2, 4000: 3}
la_value = {19000: .5, 20000: 1, 20500: 1.2}
unit_value_factors = {'VN':vn_value, 'G_CN':g_cn_value, 'LA':la_value}
package = {'VPN':.3, 'SPS':.5, 'LSS':.7, 'SCE':.8}
d_cc = {1700: 1.3, 1800:2.1}
E_cc = {1300:1.8, 1400:.6}
HD_cc = {1400:1.5, 1800:1.7, 1900:1.8}
cc_factors = {'D':d_cc, 'E':E_cc, 'HD':HD_cc}
factors = {'value_class':unit_value_factors, 'package_modifier':package, 'cc_class':cc_factors}
df_value_class = ['VN', 'VN', 'G_CN', 'LA']
df_value = [26000, 30000, 3000, 19000]
df_package = ['VPN', 'SPS', 'LSS', 'SCE']
df_cc_class = ['D', 'E', 'HD', 'HD']
df_cc = [1700, 1300, 1400, 1900]
value_factor = [3,2,2,.5]
package_factor = [.3, .5 ,.7, .8]
cc_factor = [1.3, 1.8, 1.5, 1.8]
df1 = pd.DataFrame({'value_class':df_value_class, 'value':df_value, 'package_modifier':df_package
, 'cc_class':df_cc_class, 'cc':df_cc, 'AI':df_sc_AI, 'MS':df_sc_MS})
df_result = pd.DataFrame({'value_class':df_value_class, 'value':df_value, 'value_answer':value_factor
, 'package_modifier':df_package, 'package_answer':package_factor, 'cc_class':df_cc_class
, 'cc':df_cc, 'cc_answer':cc_factor})
df_result['score'] = df_result['value_answer']*df_result['package_answer'] * df_result['cc_answer']
答案 0 :(得分:1)
这要快得多。
for column in columns:
for key, value in model_4_factors.items():
if column == key:
df[column+'_factor_4'] = df[key].map(value)