Question

从ageron的手动ML与scipy和tensorflow，Adaboost功能方程式详细描述，除了在训练时如何使用实例权重。下面是我使用skelearn的DecisionTree分类器的代码，我认为sample_weight在fit（）时可能是Weights，但是在更改n_estimators时精度是不稳定的。代码有什么问题？

from sklearn.model_selection import train_test_split
from sklearn.datasets import make_moons
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from matplotlib import pyplot as plt

X, y = make_moons(n_samples=500, noise=0.30, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y)

# implement AdaBoost classification ,TBD: score weired with n_estimators and max_depth
eta = 0.5 # learning_rate
n_estimators = 10 # simple start

# initial
clfs = [DecisionTreeClassifier(max_depth=1),] * n_estimators # predictors
W = np.ones((X_train.shape[0])) / X_train.shape[0]    # instance weight
R = np.zeros(n_estimators)  # weighted error rate of predictors
Alpha = np.zeros(n_estimators)  # predictor weight

# build_trees
for j in range(n_estimators):
    clf = clfs[j]
    plt.plot(W)
    clf.fit(X_train, y_train, sample_weight=W)
    y_pred_train = clf.predict(X_train)
    # Equation7-1,
    R[j] = W[y_pred_train != y_train].sum() / (W.sum())
    # 7-2,
    Alpha[j] = eta * np.log((1 - R[j]) / R[j])
    # 7-3, update Weight
    W[y_pred_train != y_train] *= np.exp(Alpha[j])
    # normalize
    W /= W.sum()

# predict
K = np.zeros((y_test.shape[0], n_estimators), dtype=np.int32)
for j in range(n_estimators):
    K[:,j] = clfs[j].predict(X_test)
# find max k with sum(alpha)
V = np.zeros((y_test.shape[0], 2))
for i in range(y_test.shape[0]):
    for j in range(n_estimators):
        if K[i,j] == y_test[i]:
            V[i,y_test[i]] += Alpha[j]

y_pred = np.argmax(V, axis=1)

print(accuracy_score(y_test, y_pred))

plt.legend(range(n_estimators))
plt.show()

如何在AdaBoost实现中将实例权重用于自定义丢失函数

0 个答案: