from sklearn.linear_model import LogisticRegression
from sklearn.feature_selection import chi2
from sklearn.metrics import roc_auc_score
x = np.random.rand(20,100,3) #(There are 20 games, 100 players, 3 score for each game)
y_list = []
for i in range(100):
if i < 11:
y = 0
else:
y =1
y_list.append(y)
#10% of players are labelled as 0, 90% of players are labelled as 1
x_all = []
for idx,game in enumerate(x): #for each game
x_list = []
for person in game: #for each person
x_list.append(person) #add game score
clf = LogisticRegression(solver='lbfgs')
clf.fit(x_list,y_list) #fit for Logistic regression
scores, pvalues = chi2(x_list, y_list)
score = clf.score(x_list,y_list)
auc=roc_auc_score(y_list, clf.predict_proba(x_list)[:,1]) #calculate AUC for regression
print('Game : {}, Score : {}, AUC : {}, People : {}'.format(idx,score,auc,len(x_list)))
if auc > 0: #for all case
x_all.append(clf.predict_proba(X=x_list)[:,1].flatten()) #save each players predicted probability of each game
x_data= list(map(list, zip(*x_all))) #Transpose
#x_data contains predicted probabilities of each person from each game's logistic regression prediction
clf = LogisticRegression(solver='lbfgs')
clf.fit(x_data,y_list) #fit for Logistic regression
final_score = clf.score(x_data,y_list)
final_auc = roc_auc_score(y_list, clf.predict_proba(x_data)[:,1]) #calculate ensemble AUC
print('Ensemble Result up to {}, Score : {}, AUC : {}, People : {}'.format(len(x),final_score,final_auc,len(y_list)))
有20场比赛,100名球员,每场比赛3分。
由于X是随机创建的,因此在与逻辑回归拟合后,每个游戏的AUC都非常低(约0.5)
但是,当我将每个玩家对每个游戏的预测概率作为另一个X进行逻辑回归时(我想是整体方法)
我得到的AUC高于0.9,这是很高的。
我不明白,因为所有变量都是随机生成的,没有损失,重量更新等。 (仅通过逻辑回归模型可以看到所有变量) 波纹管是上面代码的输出。
Game : 0, Score : 0.89, AUC : 0.5740551583248212, People : 100
Game : 1, Score : 0.89, AUC : 0.6833503575076609, People : 100
Game : 2, Score : 0.89, AUC : 0.6006128702757916, People : 100
Game : 3, Score : 0.89, AUC : 0.6659856996935648, People : 100
Game : 4, Score : 0.89, AUC : 0.7191011235955055, People : 100
Game : 5, Score : 0.89, AUC : 0.6455566905005108, People : 100
Game : 6, Score : 0.89, AUC : 0.5628192032686414, People : 100
Game : 7, Score : 0.89, AUC : 0.6905005107252299, People : 100
Game : 8, Score : 0.89, AUC : 0.7048008171603678, People : 100
Game : 9, Score : 0.89, AUC : 0.6220633299284984, People : 100
Game : 10, Score : 0.89, AUC : 0.6567926455566905, People : 100
Game : 11, Score : 0.89, AUC : 0.7517875383043923, People : 100
Game : 12, Score : 0.89, AUC : 0.5852911133810009, People : 100
Game : 13, Score : 0.89, AUC : 0.7007150153217568, People : 100
Game : 14, Score : 0.89, AUC : 0.5526046986721145, People : 100
Game : 15, Score : 0.89, AUC : 0.5801838610827375, People : 100
Game : 16, Score : 0.89, AUC : 0.6312563840653729, People : 100
Game : 17, Score : 0.89, AUC : 0.6751787538304392, People : 100
Game : 18, Score : 0.89, AUC : 0.7150153217568948, People : 100
Game : 19, Score : 0.89, AUC : 0.5873340143003065, People : 100
Ensemble Result up to 20, Score : 0.89, AUC : 0.9448416751787538, People : 100