我在下面的问题中问过类似的情况,并根据答案想出了解决方案,但我迷路了。 Checking for only column value to be present in dataframe
问题陈述如下-
我需要对列验证值进行分组,并进行查找,以使只有一组列具有值。
我分为三组:-
all_non_tiered_rates_blank = (df[['age_0', 'age_0_tobacco', 'age_1',
'age_1_tobacco', 'age_2', 'age_2_tobacco', 'age_3', 'age_3_tobacco', 'age_4',
'age_4_tobacco',
'age_5', 'age_5_tobacco', 'age_6', 'age_6_tobacco', 'age_7', 'age_7_tobacco',
'age_8',
'age_8_tobacco',
'age_9', 'age_9_tobacco', 'age_10', 'age_10_tobacco', 'age_11',
'age_11_tobacco', 'age_12',
'age_12_tobacco',
'age_13', 'age_13_tobacco', 'age_14', 'age_14_tobacco', 'age_15',
'age_15_tobacco',
'age_16', 'age_16_tobacco',
'age_17', 'age_17_tobacco', 'age_18', 'age_18_tobacco', 'age_19',
'age_19_tobacco',
'age_20', 'age_20_tobacco',
'age_21', 'age_21_tobacco', 'age_22', 'age_22_tobacco', 'age_23',
'age_23_tobacco',
'age_24', 'age_24_tobacco',
'age_25', 'age_25_tobacco', 'age_26', 'age_26_tobacco', 'age_27',
'age_27_tobacco',
'age_28', 'age_28_tobacco',
'age_29', 'age_29_tobacco', 'age_30', 'age_30_tobacco', 'age_31',
'age_31_tobacco',
'age_32', 'age_32_tobacco',
'age_33', 'age_33_tobacco', 'age_34', 'age_34_tobacco', 'age_35',
'age_35_tobacco',
'age_36', 'age_36_tobacco',
'age_37', 'age_37_tobacco', 'age_38', 'age_38_tobacco', 'age_39',
'age_39_tobacco',
'age_40', 'age_40_tobacco',
'age_41', 'age_41_tobacco', 'age_42', 'age_42_tobacco', 'age_43',
'age_43_tobacco',
'age_44', 'age_44_tobacco',
'age_45', 'age_45_tobacco', 'age_46', 'age_46_tobacco', 'age_47',
'age_47_tobacco',
'age_48', 'age_48_tobacco',
'age_49', 'age_49_tobacco', 'age_50', 'age_50_tobacco', 'age_51',
'age_51_tobacco',
'age_52', 'age_52_tobacco',
'age_53', 'age_53_tobacco', 'age_54', 'age_54_tobacco', 'age_55',
'age_55_tobacco',
'age_56', 'age_56_tobacco',
'age_57', 'age_57_tobacco', 'age_58', 'age_58_tobacco', 'age_59',
'age_59_tobacco',
'age_60', 'age_60_tobacco',
'age_61', 'age_61_tobacco', 'age_62', 'age_62_tobacco', 'age_63',
'age_63_tobacco',
'age_64', 'age_64_tobacco']].notna().all(axis=1))
第二组:-
all_tiered_rates_blank = (df[['single', 'single_and_spouse', 'single_and_children', 'family', 'child_only']].notna().all(axis=1))
第三组:-
fixed_price_check = ((df['fixed_price'].notna()) & (df['fixed_price'] >= 0))
在这些one
列组中只有3
应该有一个值,并返回True
。如果有多个群组返回True
,那么应该返回我False
。
这意味着:-all_non_tiered_rates_blank
,all_tiered_rates_blank
和fixed_price_check
之一可以是True
,否则是False
。
我该如何处理。
样本数据-:
hios_plan_identifier wellthie_issuer_identifier plan_year group_or_individual_plan_type data_sourced_from effective_start_date effective_end_date age_0 age_0_tobacco age_1 age_1_tobacco age_2 age_2_tobacco age_3 age_3_tobacco age_4 age_4_tobacco age_5 age_5_tobacco age_6 age_6_tobacco age_7 age_7_tobacco age_8 age_8_tobacco age_9 age_9_tobacco age_10 age_10_tobacco age_11 age_11_tobacco age_12 age_12_tobacco age_13 age_13_tobacco age_14 age_14_tobacco age_15 age_15_tobacco age_16 age_16_tobacco age_17 age_17_tobacco age_18 age_18_tobacco age_19 age_19_tobacco age_20 age_20_tobacco age_21 age_21_tobacco age_22 age_22_tobacco age_23 age_23_tobacco age_24 age_24_tobacco age_25 age_25_tobacco age_26 age_26_tobacco age_27 age_27_tobacco age_28 age_28_tobacco age_29 age_29_tobacco age_30 age_30_tobacco age_31 age_31_tobacco age_32 age_32_tobacco age_33 age_33_tobacco age_34 age_34_tobacco age_35 age_35_tobacco age_36 age_36_tobacco age_37 age_37_tobacco age_38 age_38_tobacco age_39 age_39_tobacco age_40 age_40_tobacco age_41 age_41_tobacco age_42 age_42_tobacco age_43 age_43_tobacco age_44 age_44_tobacco age_45 age_45_tobacco age_46 age_46_tobacco age_47 age_47_tobacco age_48 age_48_tobacco age_49 age_49_tobacco age_50 age_50_tobacco age_51 age_51_tobacco age_52 age_52_tobacco age_53 age_53_tobacco age_54 age_54_tobacco age_55 age_55_tobacco age_56 age_56_tobacco age_57 age_57_tobacco age_58 age_58_tobacco age_59 age_59_tobacco age_60 age_60_tobacco age_61 age_61_tobacco age_62 age_62_tobacco age_63 age_63_tobacco age_64 age_64_tobacco rating_zone_code county_fips fixed_price prices_indexed_by_quantity_of_children single single_and_spouse single_and_children family child_only issuer_id state_id medical_plan_id county_id rating_zone_id zip_code_county_id
0 99806CAAVMQ-TMP UHC99806 2074 Group uhc 2018-10-01 2018-12-31 396.23 396.23 396.23 396.23 396.23 396.23 396.23 396.23 396.23 396.23 396.23 396.23 396.23 396.23 396.23 396.23 396.23 396.23 396.23 396.23 396.23 396.23 396.23 396.23 396.23 396.23 396.23 396.23 396.23 396.23 431.45 431.45 444.92 444.92 458.39 458.39 472.89 472.89 487.39 487.39 502.41 502.41 517.95 517.95 517.95 517.95 517.95 517.95 517.95 517.95 520.02 520.02 530.38 530.38 542.81 542.81 563.01 563.01 579.59 579.59 587.87 587.87 600.30 600.30 612.73 612.73 620.50 620.50 628.79 628.79 632.93 632.93 637.08 637.08 641.22 641.22 645.37 645.37 653.65 653.65 661.94 661.94 674.37 674.37 686.28 686.28 702.86 702.86 723.58 723.58 747.92 747.92 776.93 776.93 809.56 809.56 846.85 846.85 883.62 883.62 925.06 925.06 965.98 965.98 1011.04 1011.04 1056.62 1056.62 1105.82 1105.82 1155.03 1155.03 1208.38 1208.38 1262.24 1262.24 1319.74 1319.74 1348.22 1348.22 1405.72 1405.72 1455.44 1455.44 1488.07 1488.07 1528.99 1528.99 1553.85 1553.85 GA01 06-013 NaN NaN NaN NaN NaN NaN NaN 484 17 1601286 6013 1135 None
1 99806CAAVMQ-TMP UHC99806 2074 Group uhc 2018-10-01 2018-12-31 470.93 470.93 470.93 470.93 470.93 470.93 470.93 470.93 470.93 470.93 470.93 470.93 470.93 470.93 470.93 470.93 470.93 470.93 470.93 470.93 470.93 470.93 470.93 470.93 470.93 470.93 470.93 470.93 470.93 470.93 512.79 512.79 528.79 528.79 544.80 544.80 562.03 562.03 579.27 579.27 597.12 597.12 615.59 615.59 615.59 615.59 615.59 615.59 615.59 615.59 618.05 618.05 630.36 630.36 645.14 645.14 669.15 669.15 688.85 688.85 698.69 698.69 713.47 713.47 728.24 728.24 737.48 737.48 747.33 747.33 752.25 752.25 757.18 757.18 762.10 762.10 767.03 767.03 776.87 776.87 786.72 786.72 801.50 801.50 815.66 815.66 835.36 835.36 859.98 859.98 888.91 888.91 923.39 923.39 962.17 962.17 1006.49 1006.49 1050.20 1050.20 1099.44 1099.44 1148.08 1148.08 1201.63 1201.63 1255.80 1255.80 1314.28 1314.28 1372.77 1372.77 1436.17 1436.17 1500.19 1500.19 1568.52 1568.52 1602.38 1602.38 1670.71 1670.71 1729.81 1729.81 1768.59 1768.59 1817.22 1817.22 1846.77 1846.77 GA01 06-013 NaN NaN 1.0 1.0 1.0 1.0 1.0 484 17 1601286 6013 1135 None
2 99806CAAVMQ-TMP UHC99806 2074 Group uhc 2018-10-01 2018-12-31 284.01 284.01 284.01 284.01 284.01 284.01 284.01 284.01 284.01 284.01 284.01 284.01 284.01 284.01 284.01 284.01 284.01 284.01 284.01 284.01 284.01 284.01 284.01 284.01 284.01 284.01 284.01 284.01 284.01 284.01 309.25 309.25 318.90 318.90 328.56 328.56 338.95 338.95 349.35 349.35 360.11 360.11 371.25 371.25 371.25 371.25 371.25 371.25 371.25 371.25 372.74 372.74 380.16 380.16 389.07 389.07 403.55 403.55 415.43 415.43 421.37 421.37 430.28 430.28 439.19 439.19 444.76 444.76 450.70 450.70 453.67 453.67 456.64 456.64 459.61 459.61 462.58 462.58 468.52 468.52 474.46 474.46 483.37 483.37 491.91 491.91 503.79 503.79 518.64 518.64 536.09 536.09 556.88 556.88 580.26 580.26 606.99 606.99 633.35 633.35 663.05 663.05 692.38 692.38 724.68 724.68 757.35 757.35 792.62 792.62 827.89 827.89 866.13 866.13 904.74 904.74 945.95 945.95 966.36 966.36 1007.57 1007.57 1043.21 1043.21 1066.60 1066.60 1095.93 1095.93 1113.75 1113.75 GA01 06-013 NaN 1.0 NaN NaN NaN NaN NaN 484 17 1601286 6013 1135 None
3 99806CAAVMQ-TMP UHC99806 2074 Group uhc 2018-10-01 2018-12-31 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN GA01 06-013 1.0 NaN NaN NaN NaN NaN NaN 484 17 1601286 6013 1135 None
4 99806CAAVMQ-TMP UHC99806 2074 Group uhc 2018-10-01 2018-12-31 388.31 388.31 388.31 388.31 388.31 388.31 388.31 388.31 388.31 388.31 388.31 388.31 388.31 388.31 388.31 388.31 388.31 388.31 388.31 388.31 388.31 388.31 388.31 388.31 388.31 388.31 388.31 388.31 388.31 388.31 422.82 422.82 436.02 436.02 449.22 449.22 463.43 463.43 477.64 477.64 492.36 492.36 507.59 507.59 507.59 507.59 507.59 507.59 507.59 507.59 509.62 509.62 519.77 519.77 531.95 531.95 551.75 551.75 567.99 567.99 576.11 576.11 588.30 588.30 600.48 600.48 608.09 608.09 616.21 616.21 620.27 620.27 624.34 624.34 628.40 628.40 632.46 632.46 640.58 640.58 648.70 648.70 660.88 660.88 672.56 672.56 688.80 688.80 709.10 709.10 732.96 732.96 761.39 761.39 793.36 793.36 829.91 829.91 865.95 865.95 906.56 906.56 946.66 946.66 990.82 990.82 1035.48 1035.48 1083.70 1083.70 1131.93 1131.93 1184.21 1184.21 1237.00 1237.00 1293.34 1293.34 1321.26 1321.26 1377.60 1377.60 1426.33 1426.33 1458.31 1458.31 1498.41 1498.41 1522.77 1522.77 GA01 06-013 NaN NaN NaN NaN NaN NaN NaN 484 17 1601286 6013 1135 None
5 99806CAAVMQ-TMP UHC99806 2074 Group uhc 2018-10-01 2018-12-31 259.88 259.88 259.88 259.88 259.88 259.88 259.88 259.88 259.88 259.88 259.88 259.88 259.88 259.88 259.88 259.88 259.88 259.88 259.88 259.88 259.88 259.88 259.88 259.88 259.88 259.88 259.88 259.88 259.88 259.88 282.98 282.98 291.81 291.81 300.64 300.64 310.16 310.16 319.67 319.67 329.52 329.52 339.71 339.71 339.71 339.71 339.71 339.71 339.71 339.71 341.07 341.07 347.86 347.86 356.02 356.02 369.26 369.26 380.14 380.14 385.57 385.57 393.72 393.72 401.88 401.88 406.97 406.97 412.41 412.41 415.13 415.13 417.84 417.84 420.56 420.56 423.28 423.28 428.71 428.71 434.15 434.15 442.30 442.30 450.12 450.12 460.99 460.99 474.57 474.57 490.54 490.54 509.57 509.57 530.97 530.97 555.43 555.43 579.55 579.55 606.72 606.72 633.56 633.56 663.11 663.11 693.01 693.01 725.28 725.28 757.55 757.55 792.54 792.54 827.87 827.87 865.58 865.58 884.27 884.27 921.97 921.97 954.59 954.59 975.99 975.99 1002.82 1002.82 1019.13 1019.13 GA01 06-013 NaN NaN NaN NaN NaN NaN NaN 484 17 1601286 6013 1135 None
6 99806CAAVMQ-TMP UHC99806 2074 Group uhc 2018-10-01 2018-12-31 247.78 247.78 247.78 247.78 247.78 247.78 247.78 247.78 247.78 247.78 247.78 247.78 247.78 247.78 247.78 247.78 247.78 247.78 247.78 247.78 247.78 247.78 247.78 247.78 247.78 247.78 247.78 247.78 247.78 247.78 269.80 269.80 278.22 278.22 286.64 286.64 295.71 295.71 304.78 304.78 314.17 314.17 323.89 323.89 323.89 323.89 323.89 323.89 323.89 323.89 325.19 325.19 331.66 331.66 339.44 339.44 352.07 352.07 362.43 362.43 367.62 367.62 375.39 375.39 383.16 383.16 388.02 388.02 393.20 393.20 395.79 395.79 398.38 398.38 400.98 400.98 403.57 403.57 408.75 408.75 413.93 413.93 421.70 421.70 429.15 429.15 439.52 439.52 452.47 452.47 467.70 467.70 485.84 485.84 506.24 506.24 529.56 529.56 552.56 552.56 578.47 578.47 604.05 604.05 632.23 632.23 660.74 660.74 691.51 691.51 722.27 722.27 755.64 755.64 789.32 789.32 825.27 825.27 843.09 843.09 879.04 879.04 910.13 910.13 930.54 930.54 956.12 956.12 971.67 971.67 GA01 06-013 NaN NaN NaN NaN NaN NaN NaN 484 17 1601286 6013 1135 None
7 99806CAAVMQ-TMP UHC99806 2074 Group uhc 2018-10-01 2018-12-31 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN GA01 06-013 NaN NaN NaN NaN NaN NaN NaN 484 17 1601286 6013 1135 None
8 99806CAAVMQ-TMP UHC99806 2074 Group uhc 2018-10-01 2018-12-31 236.12 236.12 236.12 236.12 236.12 236.12 236.12 236.12 236.12 236.12 236.12 236.12 236.12 236.12 236.12 236.12 236.12 236.12 236.12 236.12 236.12 236.12 236.12 236.12 236.12 236.12 236.12 236.12 236.12 236.12 257.11 257.11 265.13 265.13 273.16 273.16 281.80 281.80 290.44 290.44 299.39 299.39 308.65 308.65 308.65 308.65 308.65 308.65 308.65 308.65 309.88 309.88 316.06 316.06 323.47 323.47 335.50 335.50 345.38 345.38 350.32 350.32 357.73 357.73 365.13 365.13 369.76 369.76 374.70 374.70 377.17 377.17 379.64 379.64 382.11 382.11 384.58 384.58 389.52 389.52 394.45 394.45 401.86 401.86 408.96 408.96 418.84 418.84 431.18 431.18 445.69 445.69 462.98 462.98 482.42 482.42 504.64 504.64 526.56 526.56 551.25 551.25 575.63 575.63 602.48 602.48 629.65 629.65 658.97 658.97 688.29 688.29 720.08 720.08 752.18 752.18 786.44 786.44 803.42 803.42 837.68 837.68 867.31 867.31 886.75 886.75 911.13 911.13 925.95 925.95 GA01 06-013 1.0 NaN NaN NaN NaN NaN NaN 484 17 1601286 6013 1135 None
答案 0 :(得分:1)
all_non_tiered_rates_blank = df[['age_0', 'age_0_tobacco', 'age_1', 'age_1_tobacco', ..... 'age_64', 'age_64_tobacco']]
all_non_tiered_rates_blank['count_notnull']=all_non_tiered_rates_blank.count(axis=1)
all_non_tiered_rates_blank['value_all_non_tiered'] = np.where(all_non_tiered_rates_blank['count_notnull']==(len(all_non_tiered_rates_blank.columns)-1),(len(all_non_tiered_rates_blank.columns)-1),np.NaN)
all_tiered_rates_blank = df[['single', 'single_and_spouse', 'single_and_children', 'family', 'child_only']]
all_tiered_rates_blank['count_notnull']=all_tiered_rates_blank.count(axis=1)
all_tiered_rates_blank['value_all_tiered'] = np.where(all_tiered_rates_blank['count_notnull']==(len(all_tiered_rates_blank.columns)-1),(len(all_tiered_rates_blank.columns)-1),np.NaN)
fixed_price_check = df['fixed_price']
prices_indexed_by_quantity_of_children_check = df['prices_indexed_by_quantity_of_children']
#Concatenate all the 4 DataFrames into one DataFrame df_final
df_final = pd.concat([all_non_tiered_rates_blank[['value_all_non_tiered']], all_tiered_rates_blank[['value_all_tiered']], fixed_price_check, prices_indexed_by_quantity_of_children_check], axis=1)
#Will give a count of non-NULLs.
df_final['count_notnull']=df_final.count(axis=1)
#df_bool is your boolean dataframe. It will be True if only 1 of the 4 DataFrames has a value, otherwise False
df_bool = df_final['count_notnull'].map(lambda x:x==1)