我想在我的数据集中的Random Forest Regressor中实施交叉验证。我想知道我的代码是否正确?这是交叉验证的方法吗? 这是我的示例数据:
Wavelength Phase_velocity Shear_wave_velocity
1.50 202.69 240.73
1.68 192.72 240.73
1.79 205.54 240.73
........
这是我的代码:
from sklearn.ensemble import RandomForestRegressor
import numpy as np
from sklearn.datasets import make_regression
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import KFold,train_test_split,cross_val_score
df = pd.read_csv("5.5-6.csv")
df.head()
X = df[['wavelength', 'phase velocity']]
y = df['shear wave velocity']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
print (len(X_train),len(X_test),len(y_train),len(y_test))
from sklearn.model_selection import KFold
kfold = KFold(n_splits=10, shuffle=True)
rf = RandomForestRegressor(n_estimators=30000)
rf.fit(X_train, y_train)
results = cross_val_score(rf, X_train, y_train, cv=kfold) #Cross validation on training set
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
print (rf.predict(X_test)) #array_output
print (y_test)
print (rf.score(X_test, y_test))
y_pred = rf.predict(X_test)
from sklearn.metrics import mean_absolute_error
print (mean_absolute_error(y_test,y_pred))
from sklearn.metrics import mean_squared_error
from math import sqrt
rmse = sqrt(mean_squared_error(y_test,y_pred))
print(rmse)