从ageron的手动ML与scipy和tensorflow,Adaboost功能方程式详细描述,除了在训练时如何使用实例权重。 下面是我使用skelearn的DecisionTree分类器的代码,我认为sample_weight在fit()时可能是Weights,但是在更改n_estimators时精度是不稳定的。代码有什么问题?
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_moons
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from matplotlib import pyplot as plt
X, y = make_moons(n_samples=500, noise=0.30, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y)
# implement AdaBoost classification ,TBD: score weired with n_estimators and max_depth
eta = 0.5 # learning_rate
n_estimators = 10 # simple start
# initial
clfs = [DecisionTreeClassifier(max_depth=1),] * n_estimators # predictors
W = np.ones((X_train.shape[0])) / X_train.shape[0] # instance weight
R = np.zeros(n_estimators) # weighted error rate of predictors
Alpha = np.zeros(n_estimators) # predictor weight
# build_trees
for j in range(n_estimators):
clf = clfs[j]
plt.plot(W)
clf.fit(X_train, y_train, sample_weight=W)
y_pred_train = clf.predict(X_train)
# Equation7-1,
R[j] = W[y_pred_train != y_train].sum() / (W.sum())
# 7-2,
Alpha[j] = eta * np.log((1 - R[j]) / R[j])
# 7-3, update Weight
W[y_pred_train != y_train] *= np.exp(Alpha[j])
# normalize
W /= W.sum()
# predict
K = np.zeros((y_test.shape[0], n_estimators), dtype=np.int32)
for j in range(n_estimators):
K[:,j] = clfs[j].predict(X_test)
# find max k with sum(alpha)
V = np.zeros((y_test.shape[0], 2))
for i in range(y_test.shape[0]):
for j in range(n_estimators):
if K[i,j] == y_test[i]:
V[i,y_test[i]] += Alpha[j]
y_pred = np.argmax(V, axis=1)
print(accuracy_score(y_test, y_pred))
plt.legend(range(n_estimators))
plt.show()