我正在尝试使用AdaBoostRegressor作为整体学习算法并使用XGBRegressor作为基本估计量来训练ML模型。为了使我的模型更有信心,我绘制了Learning曲线(包含训练误差(rmse on)和验证错误(rmse))。
问题是关于我需要确定这两条曲线之间的最大差距是多少?
from sklearn.ensemble import AdaBoostRegressor
model_ada = xgb.XGBRegressor(random_state=42)
rns_srch_ada=AdaBoostRegressor(model_ada,
n_estimators=15,random_state=22,learning_rate=0.01)
rns_srch_ada.fit(X_train,y_train)
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
def plot_learning_curves(model, X, y):
X_train, X_val, y_train, y_val =train_test_split(X,y,test_size=0.2)
train_errors, val_errors = [], []
for m in range(1, len(X_train)):
model.fit(X_train[:m], y_train[:m])
y_train_predict = model.predict(X_train[:m])
y_val_predict = model.predict(X_val)
train_errors.append(
mean_squared_error(y_train_predict,y_train[:m]))
val_errors.append(mean_squared_error(y_val_predict, y_val))
plt.plot(np.sqrt(train_errors), "r-+", linewidth=2, label="train")
plt.plot(np.sqrt(val_errors), "b-", linewidth=3, label="val")
plt.legend()