发生以下错误:
Singleton array array('data.mean', dtype='<U9') cannot be considered a valid collection
。
我想用参数“ mean”预测天气预报。参数“平均值”是一个十进制数字值。
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score
data = pd.read_csv('dissertation_data.csv')
print (data.describe())
y = np.array('data.mean')
X = np.array(data.drop('mean', axis=1))
X_train, X_test, y_training, y_test = train_test_split(X, y,
test_size=0.2,
random_state=123,
stratify=y)
pipeline = make_pipeline(preprocessing.StandardScaler(),
RandomForestRegressor(n_estimators=100))
X_train_scaled = preprocessing.scale(X_train)
print (X_train_scaled)
scaler = preprocessing.StandardScaler().fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)
hyperparameters = { 'randomforestregressor__max_features' : ['auto', 'sqrt', 'log2'],
'randomforestregressor__max_depth': [None, 5, 3, 1]}
clf = GridSearchCV(pipeline, hyperparameters, cv=10)
clf.fit(X_train, y_training)
print (clf.best_params_)
print (clf.refit)
y_pred = clf.predict(X_test)
print (r2_score(y_test, y_pred))
print (mean_squared_error(y_test, y_pred))