我想对以下回归模型应用k倍交叉验证:
除了多项式回归之外,我都可以对所有错误应用k倍交叉验证,这给了我这个错误PolynomialFeatures' object has no attribute 'predict
。如何解决此问题。我也是在正确地完成这项工作吗,实际上我的主要动机是看哪种模型表现更好,所以有更好的方法来完成这项工作吗?
# Compare Algorithms
import pandas
import matplotlib.pyplot as plt
from sklearn import model_selection
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
# load dataset
names = ['YearsExperience', 'Salary']
dataframe = pandas.read_csv('Salary_Data.csv', names=names)
array = dataframe.values
X = array[1:,0]
Y = array[1:,1]
X = X.reshape(-1, 1)
Y = Y.reshape(-1, 1)
# prepare configuration for cross validation test harness
seed = 7
# prepare models
models = []
models.append(('LR', LinearRegression()))
models.append(('PR', PolynomialFeatures(degree = 4)))
models.append(('SVR', SVR(kernel = 'rbf')))
models.append(('DTR', DecisionTreeRegressor()))
models.append(('RFR', RandomForestRegressor(n_estimators = 10)))
# evaluate each model in turn
results = []
names = []
scoring = 'neg_mean_absolute_error'
for name, model in models:
kfold = model_selection.KFold(n_splits=10, random_state=seed)
cv_results = model_selection.cross_val_score(model, X, Y.ravel(), cv=kfold, scoring=scoring)
results.append(cv_results)
names.append(name)
msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
print(msg)
# boxplot algorithm comparison
fig = plt.figure()
fig.suptitle('Algorithm Comparison')
ax = fig.add_subplot(111)
plt.boxplot(results)
ax.set_xticklabels(names)
plt.show()
答案 0 :(得分:1)
在sklearn
中,您可以通过以下方式获得多项式回归:
sklearn.preprocessing.PolynomialFeatures
sklearn.linear_model.LinearRegression
在转换后的数据集上运行普通最小二乘线性回归玩具示例:
from sklearn.preprocessing import PolynomialFeatures
from sklearn import linear_model
# Create linear regression object
poly = PolynomialFeatures(degree=3)
X_train = poly.fit_transform(X_train)
X_test = poly.fit_transform(X_test)
model = linear_model.LinearRegression()
model.fit(X_train, y_train)
print(model.score(X_train, y_train))
答案 1 :(得分:0)
如果有人要参考,这里是代码的更改部分:
# prepare models
models = []
models.append(('LR', LinearRegression()))
models.append(('PR', LinearRegression()))
models.append(('SVR', SVR(kernel = 'rbf')))
models.append(('DTR', DecisionTreeRegressor()))
models.append(('RFR', RandomForestRegressor(n_estimators = 10)))
# evaluate each model in turn
results = []
names = []
scoring = 'neg_mean_absolute_error'
for name, model in models:
kfold = model_selection.KFold(n_splits=10, random_state=seed)
if name == 'PR':
poly_reg = PolynomialFeatures(degree = 4)
X_poly = poly_reg.fit_transform(X)
cv_results = model_selection.cross_val_score(model, X_poly, Y.ravel(), cv=kfold, scoring=scoring)
else:
cv_results = model_selection.cross_val_score(model, X, Y.ravel(), cv=kfold, scoring=scoring)
results.append(cv_results)
names.append(name)
msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())