我想使用自定义评分器进行交叉验证,该交叉验证使用测试样本的索引(索引是某些数据列)。如何在得分手中使用索引?我试图这样做,但是不起作用:
import pandas as pd
from sklean.model_selection import cross_val_score
from sklearn.metrix import make_scorer
def custom_score(y_true, y_pred, test_index):
tmp = pd.DataFrame({'predict' : y_pred, 'label' : y_true, 'test_idx': test_index})
some_value = tmp.groupby('test_idx').apply(lambda row: ...).sum()
return some_value / ...
cv = GroupShuffleSplit(3, test_size=0.3)
my_score = make_scorer(custom_score)
data = data.set_index(some_col)
X = data.drop(target, axis=1)
y = data[target]
scores = cross_val_score(some_model, X, y, cv=cv, groups=X.indexs, scoring=my_score)