from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import SGDRegressor
from sklearn.model_selection import RandomizedSearchCV
train,test,train_label,test_label=train_test_split(feature_data,target_data,test_size=0.20,random_state=3)
sc=StandardScaler()
train_std=sc.fit_transform(train)
test_Std=sc.transform(test)
pipe=SGDRegressor()
parameters = {'sgd__loss':['squared_loss','huber'],
'sgd__n_iter':np.ceil(106/len(train_label)),
'sgd__alpha':10.0**np.arange(1,7),
}
g_search=RandomizedSearchCV(pipe,param_distributions=parameters,random_state=2)
g_fit=g_search.fit(train_std,train_label)
培训数据:
train_std
Out[46]:
array([[ 1.99470848, 2.39114909, 0.96705 , ..., 0.23698853,
0.89215521, -0.74111955],
[-0.50742363, -0.54567689, -0.29516734, ..., 0.00491999,
-0.73959331, 0.42680023],
[-0.46965669, -0.10483307, 0.90566027, ..., -0.34272278,
0.69705485, 0.56151837],
...,
[-0.05849323, 0.11803686, 0.45737245, ..., 0.24026818,
0.75026404, -0.3829142 ],
[ 0.83045625, 0.66257208, -0.01582026, ..., 0.32870492,
-0.27844698, -0.83648146],
[-0.0886727 , 0.46158079, 1.36521081, ..., -0.10050365,
-0.68638412, -0.04006983]])
培训标签
train_label
Out[47]:
24429 1.863
32179 18.296
42715 1.417
6486 6.562
39407 18.669
...
42602 6.002
6557 2.921
30305 11.835
4718 1.212
错误:“ numpy.float64”类型的对象没有len()
g_fit在拟合训练数据时导致错误
我正在尝试通过RandomizedSearchCV使用SGDRegressor,但这得到了
调整训练数据时出错
答案 0 :(得分:2)
我猜下面的行会引起上述错误:
parameters = {...,
'sgd__n_iter':np.ceil(10**6/len(train_label)), # <--- should be a list-like object, not a scalar!
...,
}
尝试以下操作:
parameters = {'sgd__loss':['squared_loss','huber'],
'sgd__n_iter': [np.ceil(10**6/len(train_label))],
# NOTE: ^ ^
'sgd_alpha':10.0**np.arange(1,7),
}
答案 1 :(得分:2)
因此,错误是由与键'sgd__n_iter'
:np.ceil(10**6/len(train_label))
相对应的值引起的。
因此,您有两种方法可以解决此问题:
[np.ceil(10**6/len(train_label))]
SGDRegressor
的构造函数中,不要将其放入param_distributions
字典中。我还注意到您的代码中存在一些不一致之处,因此请在下面查看代码的最小版本和更简洁版本
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.linear_model import SGDRegressor
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_regression
n_samples = 1000
n_features = 50
X, y = make_regression(n_samples=n_samples, n_features=n_features)
X_train, X_test, y_train, y_test = train_test_split(X, y)
pipe = Pipeline([('scaler', StandardScaler()),
('sgd', SGDRegressor())])
parameters = {'sgd__loss': ['squared_loss','huber'],
'sgd__n_iter': [np.ceil(10**6 / n_samples)],
'sgd__alpha': 10.0**np.arange(1,7)}
g_search = RandomizedSearchCV(pipe, param_distributions=parameters, random_state=2)
g_search.fit(X_train, y_train)