检查数据框中仅存在一组列值

时间:2018-12-27 14:24:25

标签: python pandas dataframe

我在下面的问题中问过类似的情况,并根据答案想出了解决方案,但我迷路了。 Checking for only column value to be present in dataframe

问题陈述如下-

我需要对列验证值进行分组,并进行查找,以使只有一组列具有值。

我分为三组:-

all_non_tiered_rates_blank = (df[['age_0', 'age_0_tobacco', 'age_1',
                                          'age_1_tobacco', 'age_2', 'age_2_tobacco', 'age_3', 'age_3_tobacco', 'age_4',
                                          'age_4_tobacco',
                                          'age_5', 'age_5_tobacco', 'age_6', 'age_6_tobacco', 'age_7', 'age_7_tobacco',
                                          'age_8',
                                          'age_8_tobacco',
                                          'age_9', 'age_9_tobacco', 'age_10', 'age_10_tobacco', 'age_11',
                                          'age_11_tobacco', 'age_12',
                                          'age_12_tobacco',
                                          'age_13', 'age_13_tobacco', 'age_14', 'age_14_tobacco', 'age_15',
                                          'age_15_tobacco',
                                          'age_16', 'age_16_tobacco',
                                          'age_17', 'age_17_tobacco', 'age_18', 'age_18_tobacco', 'age_19',
                                          'age_19_tobacco',
                                          'age_20', 'age_20_tobacco',
                                          'age_21', 'age_21_tobacco', 'age_22', 'age_22_tobacco', 'age_23',
                                          'age_23_tobacco',
                                          'age_24', 'age_24_tobacco',
                                          'age_25', 'age_25_tobacco', 'age_26', 'age_26_tobacco', 'age_27',
                                          'age_27_tobacco',
                                          'age_28', 'age_28_tobacco',
                                          'age_29', 'age_29_tobacco', 'age_30', 'age_30_tobacco', 'age_31',
                                          'age_31_tobacco',
                                          'age_32', 'age_32_tobacco',
                                          'age_33', 'age_33_tobacco', 'age_34', 'age_34_tobacco', 'age_35',
                                          'age_35_tobacco',
                                          'age_36', 'age_36_tobacco',
                                          'age_37', 'age_37_tobacco', 'age_38', 'age_38_tobacco', 'age_39',
                                          'age_39_tobacco',
                                          'age_40', 'age_40_tobacco',
                                          'age_41', 'age_41_tobacco', 'age_42', 'age_42_tobacco', 'age_43',
                                          'age_43_tobacco',
                                          'age_44', 'age_44_tobacco',
                                          'age_45', 'age_45_tobacco', 'age_46', 'age_46_tobacco', 'age_47',
                                          'age_47_tobacco',
                                          'age_48', 'age_48_tobacco',
                                          'age_49', 'age_49_tobacco', 'age_50', 'age_50_tobacco', 'age_51',
                                          'age_51_tobacco',
                                          'age_52', 'age_52_tobacco',
                                          'age_53', 'age_53_tobacco', 'age_54', 'age_54_tobacco', 'age_55',
                                          'age_55_tobacco',
                                          'age_56', 'age_56_tobacco',
                                          'age_57', 'age_57_tobacco', 'age_58', 'age_58_tobacco', 'age_59',
                                          'age_59_tobacco',
                                          'age_60', 'age_60_tobacco',
                                          'age_61', 'age_61_tobacco', 'age_62', 'age_62_tobacco', 'age_63',
                                          'age_63_tobacco',
                                          'age_64', 'age_64_tobacco']].notna().all(axis=1))

第二组:-

all_tiered_rates_blank = (df[['single', 'single_and_spouse', 'single_and_children', 'family', 'child_only']].notna().all(axis=1))

第三组:-

fixed_price_check = ((df['fixed_price'].notna()) & (df['fixed_price'] >= 0))

在这些one列组中只有3应该有一个值,并返回True。如果有多个群组返回True,那么应该返回我False

这意味着:-all_non_tiered_rates_blankall_tiered_rates_blankfixed_price_check之一可以是True,否则是False

我该如何处理。

样本数据-:

 hios_plan_identifier wellthie_issuer_identifier  plan_year group_or_individual_plan_type data_sourced_from effective_start_date effective_end_date   age_0  age_0_tobacco   age_1  age_1_tobacco   age_2  age_2_tobacco   age_3  age_3_tobacco   age_4  age_4_tobacco   age_5  age_5_tobacco   age_6  age_6_tobacco   age_7  age_7_tobacco   age_8  age_8_tobacco   age_9  age_9_tobacco  age_10  age_10_tobacco  age_11  age_11_tobacco  age_12  age_12_tobacco  age_13  age_13_tobacco  age_14  age_14_tobacco  age_15  age_15_tobacco  age_16  age_16_tobacco  age_17  age_17_tobacco  age_18  age_18_tobacco  age_19  age_19_tobacco  age_20  age_20_tobacco  age_21  age_21_tobacco  age_22  age_22_tobacco  age_23  age_23_tobacco  age_24  age_24_tobacco  age_25  age_25_tobacco  age_26  age_26_tobacco  age_27  age_27_tobacco  age_28  age_28_tobacco  age_29  age_29_tobacco  age_30  age_30_tobacco  age_31  age_31_tobacco  age_32  age_32_tobacco  age_33  age_33_tobacco  age_34  age_34_tobacco  age_35  age_35_tobacco  age_36  age_36_tobacco  age_37  age_37_tobacco  age_38  age_38_tobacco  age_39  age_39_tobacco  age_40  age_40_tobacco  age_41  age_41_tobacco  age_42  age_42_tobacco  age_43  age_43_tobacco  age_44  age_44_tobacco  age_45  age_45_tobacco  age_46  age_46_tobacco  age_47  age_47_tobacco   age_48  age_48_tobacco   age_49  age_49_tobacco   age_50  age_50_tobacco   age_51  age_51_tobacco   age_52  age_52_tobacco   age_53  age_53_tobacco   age_54  age_54_tobacco   age_55  age_55_tobacco   age_56  age_56_tobacco   age_57  age_57_tobacco   age_58  age_58_tobacco   age_59  age_59_tobacco   age_60  age_60_tobacco   age_61  age_61_tobacco   age_62  age_62_tobacco   age_63  age_63_tobacco   age_64  age_64_tobacco rating_zone_code county_fips  fixed_price  prices_indexed_by_quantity_of_children  single  single_and_spouse  single_and_children  family  child_only  issuer_id  state_id  medical_plan_id  county_id  rating_zone_id zip_code_county_id
0      99806CAAVMQ-TMP                   UHC99806       2074                         Group               uhc           2018-10-01         2018-12-31  396.23         396.23  396.23         396.23  396.23         396.23  396.23         396.23  396.23         396.23  396.23         396.23  396.23         396.23  396.23         396.23  396.23         396.23  396.23         396.23  396.23          396.23  396.23          396.23  396.23          396.23  396.23          396.23  396.23          396.23  431.45          431.45  444.92          444.92  458.39          458.39  472.89          472.89  487.39          487.39  502.41          502.41  517.95          517.95  517.95          517.95  517.95          517.95  517.95          517.95  520.02          520.02  530.38          530.38  542.81          542.81  563.01          563.01  579.59          579.59  587.87          587.87  600.30          600.30  612.73          612.73  620.50          620.50  628.79          628.79  632.93          632.93  637.08          637.08  641.22          641.22  645.37          645.37  653.65          653.65  661.94          661.94  674.37          674.37  686.28          686.28  702.86          702.86  723.58          723.58  747.92          747.92  776.93          776.93  809.56          809.56   846.85          846.85   883.62          883.62   925.06          925.06   965.98          965.98  1011.04         1011.04  1056.62         1056.62  1105.82         1105.82  1155.03         1155.03  1208.38         1208.38  1262.24         1262.24  1319.74         1319.74  1348.22         1348.22  1405.72         1405.72  1455.44         1455.44  1488.07         1488.07  1528.99         1528.99  1553.85         1553.85             GA01      06-013          NaN                                     NaN     NaN                NaN                  NaN     NaN         NaN        484        17          1601286       6013            1135               None
1      99806CAAVMQ-TMP                   UHC99806       2074                         Group               uhc           2018-10-01         2018-12-31  470.93         470.93  470.93         470.93  470.93         470.93  470.93         470.93  470.93         470.93  470.93         470.93  470.93         470.93  470.93         470.93  470.93         470.93  470.93         470.93  470.93          470.93  470.93          470.93  470.93          470.93  470.93          470.93  470.93          470.93  512.79          512.79  528.79          528.79  544.80          544.80  562.03          562.03  579.27          579.27  597.12          597.12  615.59          615.59  615.59          615.59  615.59          615.59  615.59          615.59  618.05          618.05  630.36          630.36  645.14          645.14  669.15          669.15  688.85          688.85  698.69          698.69  713.47          713.47  728.24          728.24  737.48          737.48  747.33          747.33  752.25          752.25  757.18          757.18  762.10          762.10  767.03          767.03  776.87          776.87  786.72          786.72  801.50          801.50  815.66          815.66  835.36          835.36  859.98          859.98  888.91          888.91  923.39          923.39  962.17          962.17  1006.49         1006.49  1050.20         1050.20  1099.44         1099.44  1148.08         1148.08  1201.63         1201.63  1255.80         1255.80  1314.28         1314.28  1372.77         1372.77  1436.17         1436.17  1500.19         1500.19  1568.52         1568.52  1602.38         1602.38  1670.71         1670.71  1729.81         1729.81  1768.59         1768.59  1817.22         1817.22  1846.77         1846.77             GA01      06-013          NaN                                     NaN     1.0                1.0                  1.0     1.0         1.0        484        17          1601286       6013            1135               None
2      99806CAAVMQ-TMP                   UHC99806       2074                         Group               uhc           2018-10-01         2018-12-31  284.01         284.01  284.01         284.01  284.01         284.01  284.01         284.01  284.01         284.01  284.01         284.01  284.01         284.01  284.01         284.01  284.01         284.01  284.01         284.01  284.01          284.01  284.01          284.01  284.01          284.01  284.01          284.01  284.01          284.01  309.25          309.25  318.90          318.90  328.56          328.56  338.95          338.95  349.35          349.35  360.11          360.11  371.25          371.25  371.25          371.25  371.25          371.25  371.25          371.25  372.74          372.74  380.16          380.16  389.07          389.07  403.55          403.55  415.43          415.43  421.37          421.37  430.28          430.28  439.19          439.19  444.76          444.76  450.70          450.70  453.67          453.67  456.64          456.64  459.61          459.61  462.58          462.58  468.52          468.52  474.46          474.46  483.37          483.37  491.91          491.91  503.79          503.79  518.64          518.64  536.09          536.09  556.88          556.88  580.26          580.26   606.99          606.99   633.35          633.35   663.05          663.05   692.38          692.38   724.68          724.68   757.35          757.35   792.62          792.62   827.89          827.89   866.13          866.13   904.74          904.74   945.95          945.95   966.36          966.36  1007.57         1007.57  1043.21         1043.21  1066.60         1066.60  1095.93         1095.93  1113.75         1113.75             GA01      06-013          NaN                                     1.0     NaN                NaN                  NaN     NaN         NaN        484        17          1601286       6013            1135               None
3      99806CAAVMQ-TMP                   UHC99806       2074                         Group               uhc           2018-10-01         2018-12-31     NaN            NaN     NaN            NaN     NaN            NaN     NaN            NaN     NaN            NaN     NaN            NaN     NaN            NaN     NaN            NaN     NaN            NaN     NaN            NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN      NaN             NaN      NaN             NaN      NaN             NaN      NaN             NaN      NaN             NaN      NaN             NaN      NaN             NaN      NaN             NaN      NaN             NaN      NaN             NaN      NaN             NaN      NaN             NaN      NaN             NaN      NaN             NaN      NaN             NaN      NaN             NaN      NaN             NaN             GA01      06-013          1.0                                     NaN     NaN                NaN                  NaN     NaN         NaN        484        17          1601286       6013            1135               None
4      99806CAAVMQ-TMP                   UHC99806       2074                         Group               uhc           2018-10-01         2018-12-31  388.31         388.31  388.31         388.31  388.31         388.31  388.31         388.31  388.31         388.31  388.31         388.31  388.31         388.31  388.31         388.31  388.31         388.31  388.31         388.31  388.31          388.31  388.31          388.31  388.31          388.31  388.31          388.31  388.31          388.31  422.82          422.82  436.02          436.02  449.22          449.22  463.43          463.43  477.64          477.64  492.36          492.36  507.59          507.59  507.59          507.59  507.59          507.59  507.59          507.59  509.62          509.62  519.77          519.77  531.95          531.95  551.75          551.75  567.99          567.99  576.11          576.11  588.30          588.30  600.48          600.48  608.09          608.09  616.21          616.21  620.27          620.27  624.34          624.34  628.40          628.40  632.46          632.46  640.58          640.58  648.70          648.70  660.88          660.88  672.56          672.56  688.80          688.80  709.10          709.10  732.96          732.96  761.39          761.39  793.36          793.36   829.91          829.91   865.95          865.95   906.56          906.56   946.66          946.66   990.82          990.82  1035.48         1035.48  1083.70         1083.70  1131.93         1131.93  1184.21         1184.21  1237.00         1237.00  1293.34         1293.34  1321.26         1321.26  1377.60         1377.60  1426.33         1426.33  1458.31         1458.31  1498.41         1498.41  1522.77         1522.77             GA01      06-013          NaN                                     NaN     NaN                NaN                  NaN     NaN         NaN        484        17          1601286       6013            1135               None
5      99806CAAVMQ-TMP                   UHC99806       2074                         Group               uhc           2018-10-01         2018-12-31  259.88         259.88  259.88         259.88  259.88         259.88  259.88         259.88  259.88         259.88  259.88         259.88  259.88         259.88  259.88         259.88  259.88         259.88  259.88         259.88  259.88          259.88  259.88          259.88  259.88          259.88  259.88          259.88  259.88          259.88  282.98          282.98  291.81          291.81  300.64          300.64  310.16          310.16  319.67          319.67  329.52          329.52  339.71          339.71  339.71          339.71  339.71          339.71  339.71          339.71  341.07          341.07  347.86          347.86  356.02          356.02  369.26          369.26  380.14          380.14  385.57          385.57  393.72          393.72  401.88          401.88  406.97          406.97  412.41          412.41  415.13          415.13  417.84          417.84  420.56          420.56  423.28          423.28  428.71          428.71  434.15          434.15  442.30          442.30  450.12          450.12  460.99          460.99  474.57          474.57  490.54          490.54  509.57          509.57  530.97          530.97   555.43          555.43   579.55          579.55   606.72          606.72   633.56          633.56   663.11          663.11   693.01          693.01   725.28          725.28   757.55          757.55   792.54          792.54   827.87          827.87   865.58          865.58   884.27          884.27   921.97          921.97   954.59          954.59   975.99          975.99  1002.82         1002.82  1019.13         1019.13             GA01      06-013          NaN                                     NaN     NaN                NaN                  NaN     NaN         NaN        484        17          1601286       6013            1135               None
6      99806CAAVMQ-TMP                   UHC99806       2074                         Group               uhc           2018-10-01         2018-12-31  247.78         247.78  247.78         247.78  247.78         247.78  247.78         247.78  247.78         247.78  247.78         247.78  247.78         247.78  247.78         247.78  247.78         247.78  247.78         247.78  247.78          247.78  247.78          247.78  247.78          247.78  247.78          247.78  247.78          247.78  269.80          269.80  278.22          278.22  286.64          286.64  295.71          295.71  304.78          304.78  314.17          314.17  323.89          323.89  323.89          323.89  323.89          323.89  323.89          323.89  325.19          325.19  331.66          331.66  339.44          339.44  352.07          352.07  362.43          362.43  367.62          367.62  375.39          375.39  383.16          383.16  388.02          388.02  393.20          393.20  395.79          395.79  398.38          398.38  400.98          400.98  403.57          403.57  408.75          408.75  413.93          413.93  421.70          421.70  429.15          429.15  439.52          439.52  452.47          452.47  467.70          467.70  485.84          485.84  506.24          506.24   529.56          529.56   552.56          552.56   578.47          578.47   604.05          604.05   632.23          632.23   660.74          660.74   691.51          691.51   722.27          722.27   755.64          755.64   789.32          789.32   825.27          825.27   843.09          843.09   879.04          879.04   910.13          910.13   930.54          930.54   956.12          956.12   971.67          971.67             GA01      06-013          NaN                                     NaN     NaN                NaN                  NaN     NaN         NaN        484        17          1601286       6013            1135               None
7      99806CAAVMQ-TMP                   UHC99806       2074                         Group               uhc           2018-10-01         2018-12-31     NaN            NaN     NaN            NaN     NaN            NaN     NaN            NaN     NaN            NaN     NaN            NaN     NaN            NaN     NaN            NaN     NaN            NaN     NaN            NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN     NaN             NaN      NaN             NaN      NaN             NaN      NaN             NaN      NaN             NaN      NaN             NaN      NaN             NaN      NaN             NaN      NaN             NaN      NaN             NaN      NaN             NaN      NaN             NaN      NaN             NaN      NaN             NaN      NaN             NaN      NaN             NaN      NaN             NaN      NaN             NaN             GA01      06-013          NaN                                     NaN     NaN                NaN                  NaN     NaN         NaN        484        17          1601286       6013            1135               None
8      99806CAAVMQ-TMP                   UHC99806       2074                         Group               uhc           2018-10-01         2018-12-31  236.12         236.12  236.12         236.12  236.12         236.12  236.12         236.12  236.12         236.12  236.12         236.12  236.12         236.12  236.12         236.12  236.12         236.12  236.12         236.12  236.12          236.12  236.12          236.12  236.12          236.12  236.12          236.12  236.12          236.12  257.11          257.11  265.13          265.13  273.16          273.16  281.80          281.80  290.44          290.44  299.39          299.39  308.65          308.65  308.65          308.65  308.65          308.65  308.65          308.65  309.88          309.88  316.06          316.06  323.47          323.47  335.50          335.50  345.38          345.38  350.32          350.32  357.73          357.73  365.13          365.13  369.76          369.76  374.70          374.70  377.17          377.17  379.64          379.64  382.11          382.11  384.58          384.58  389.52          389.52  394.45          394.45  401.86          401.86  408.96          408.96  418.84          418.84  431.18          431.18  445.69          445.69  462.98          462.98  482.42          482.42   504.64          504.64   526.56          526.56   551.25          551.25   575.63          575.63   602.48          602.48   629.65          629.65   658.97          658.97   688.29          688.29   720.08          720.08   752.18          752.18   786.44          786.44   803.42          803.42   837.68          837.68   867.31          867.31   886.75          886.75   911.13          911.13   925.95          925.95             GA01      06-013          1.0                                     NaN     NaN                NaN                  NaN     NaN         NaN        484        17          1601286       6013            1135               None

1 个答案:

答案 0 :(得分:1)

all_non_tiered_rates_blank = df[['age_0', 'age_0_tobacco', 'age_1', 'age_1_tobacco', ..... 'age_64', 'age_64_tobacco']]
all_non_tiered_rates_blank['count_notnull']=all_non_tiered_rates_blank.count(axis=1)
all_non_tiered_rates_blank['value_all_non_tiered'] = np.where(all_non_tiered_rates_blank['count_notnull']==(len(all_non_tiered_rates_blank.columns)-1),(len(all_non_tiered_rates_blank.columns)-1),np.NaN)

all_tiered_rates_blank = df[['single', 'single_and_spouse', 'single_and_children', 'family', 'child_only']]
all_tiered_rates_blank['count_notnull']=all_tiered_rates_blank.count(axis=1)
all_tiered_rates_blank['value_all_tiered'] = np.where(all_tiered_rates_blank['count_notnull']==(len(all_tiered_rates_blank.columns)-1),(len(all_tiered_rates_blank.columns)-1),np.NaN)

fixed_price_check = df['fixed_price']
prices_indexed_by_quantity_of_children_check = df['prices_indexed_by_quantity_of_children']

#Concatenate all the 4 DataFrames into one DataFrame df_final
df_final = pd.concat([all_non_tiered_rates_blank[['value_all_non_tiered']], all_tiered_rates_blank[['value_all_tiered']], fixed_price_check, prices_indexed_by_quantity_of_children_check], axis=1)

#Will give a count of non-NULLs.
df_final['count_notnull']=df_final.count(axis=1)

#df_bool is your boolean dataframe. It will be True if only 1 of the 4 DataFrames has a value, otherwise False
df_bool = df_final['count_notnull'].map(lambda x:x==1)