我想构建一个函数,以对列值的所有可能组合逐步过滤数据框。我已经构建了一个可以通过前三个功能及其对应的组合值进行访问的函数,但是理想情况下,不管数据帧的功能数量如何,递归函数的执行效率都会更高。
SELECT column
FROM table
WHERE day IN date ('2019-06-27','2019-07-08', '2019-07-10')
现在,我尝试构建的可恢复函数在递归调用正确的“模式”时失败了
def range_(feature):
#A utility function
#With extreme values
interval = (min(feature),max(feature))
delta = max(feature)-min(feature)
return (interval,delta)
GP_OP = pd.read_csv('GeneralPreOp_OnePager.csv')
data_GP_OP = { 'GeAr0' : (0,1),
'GeHo2' : (-1,0,1,2,3), #-1 : Patient doesn't take painkillers
'GeAr3' : (0,1),
'GeHo5' : (-1,0,1,2,3), #-1: Patient doesn't take anti-inflammatories
'GeHo6' : (0,1,2,3,4),
'GeAr9' : (0,1),
'GeDo10': (0,1),
'GeDo12': (0,1,2),
'GeDo13': (0,1,2,3)
}
#The actual code
#GP_OP[GP_OP.GeAr0 == 1][GP_OP.GeAr3 == 0]
for i,feature in enumerate(vv):
df_feature = getattr(vv,feature) #take feature
value_df_feature = data[feature] #take values of feature
total_features = len(vv)
for j,value in enumerate(value_df_feature):
interval = range_(GP_OP[df_feature == value].Oxford_Score_6w)
sample = len(GP_OP[df_feature == value])
perc = round((sample/475)*100,2)
GP_OP_oxford3[(feature,value )] = (interval,sample,perc)
#------------
for t,f in enumerate(vv): #for all the other features
if i != t: #don't test the feature with itself
test_feature = getattr(vv,f) #test feature
value_test_feature = data[f] #value of test feature
for test,test_value in enumerate(value_test_feature):
if len(GP_OP[df_feature == value][test_feature == test_value].Oxford_Score_6w) != 0:
interval2 = range_(GP_OP[df_feature == value][test_feature == test_value].Oxford_Score_6w)
sample2 = len(GP_OP[df_feature == value][test_feature == test_value])
perc2 = round((sample2/475)*100,2)
GP_OP_oxford3[(feature,value ),(f,test_value)] = (interval2 ,sample2,perc2)
#---------------
for t1,f1 in enumerate(vv): #for all the other features
if i != t1 and t!=t1: #don't test the feature with itself
test_feature1 = getattr(vv,f1) #test feature
value_test_feature1 = data[f1] #value of test feature
for test1,test_value1 in enumerate(value_test_feature1):
if len(GP_OP[df_feature == value][test_feature == test_value][test_feature1 == test_value1].Oxford_Score_6w) != 0:
interval3 = range_(GP_OP[df_feature == value][test_feature == test_value][test_feature1 == test_value1].Oxford_Score_6w)
sample3 = len(GP_OP[df_feature == value][test_feature == test_value][test_feature1 == test_value1])
perc3 = round((sample3/475)*100,2)
GP_OP_oxford3[(feature,value ),(f,test_value),(f1,test_value1)] = (interval3 ,sample3,perc3)