我使用GridSearchCV进行线性回归的交叉验证(不是分类器也不是逻辑回归)。
我也使用StandardScaler来标准化X
我的数据框有17个特征(X)和5个目标(y)(观察)。大约1150行
我一直得到ValueError:不支持连续错误消息并且没有选项。
这里有一些代码(假设所有导入都正确完成):
soilM = pd.read_csv('C:/training.csv', index_col=0)
soilM = getDummiedSoilDepth(soilM) #transform text values in 0 and 1
soilM = soilM.drop('Depth', 1)
soil = soilM.iloc[:,-22:]
X_train, X_test, Ca_train, Ca_test, P_train, P_test, pH_train, pH_test, SOC_train, SOC_test, Sand_train, Sand_test = splitTrainTestAdv(soil)
scores = ['precision', 'recall']
for score in scores:
for model in MODELS.keys():
print model, score
performParameterSelection(model, score, X_test, Ca_test, X_train, Ca_train)
def performParameterSelection(model_name, criteria, X_test, y_test, X_train, y_train):
model, param_grid = MODELS[model_name]
gs = GridSearchCV(model, param_grid, n_jobs= 1, cv=5, verbose=1, scoring='%s_weighted' % criteria)
gs.fit(X_train, y_train)
print(gs.best_params_)
for params, mean_score, scores in gs.grid_scores_:
print("%0.3f (+/-%0.03f) for %r"
% (mean_score, scores.std() * 2, params))
y_true, y_pred = y_test, gs.predict(X_test)
print(classification_report(y_true, y_pred))
MODELS = {
'lasso': (
linear_model.Lasso(),
{'alpha': [0.95]}
),
'ridge': (
linear_model.Ridge(),
{'alpha': [0.01]}
),
'elasticnet': (
linear_model.ElasticNet(),
{
'alpha': [0.6],
'l1_ratio': [0.4]
}
),
'svr': (
svm.SVR(),
{
'C': [5.0],
'epsilon': [0.1],
'kernel': ['linear']
}
)
}
def performLasso(X_train, y_train, X_test, parameter):
alpha = parameter[0]
model = linear_model.Lasso(alpha=alpha, normalize=True) #pass alpha to Lasso
model.fit(X_train, y_train)
return model.predict(X_test)
def splitTrainTestAdv(df):
y = df.iloc[:,-5:].copy() # last 5 columns
X1 = df.iloc[:,:-5].copy() # Except for last 5 columns
Ca = y['Ca'].copy()
P = y['P'].copy()
pH = y['pH'].copy()
SOC = y['SOC'].copy()
Sand = y['Sand'].copy()
#Scaling and Sampling
X = StandardScaler(copy=False).fit_transform(X1)
X_train, X_test, Ca_train, Ca_test = train_test_split(X, Ca, test_size=0.2, random_state=0)
return X_train, X_test, Ca_train, Ca_test, P_train, P_test, pH_train, pH_test, SOC_train, SOC_test, Sand_train, Sand_test
这些是代码的主要部分
这是错误输出的主要部分:
ValueError Traceback (most recent call last)
<ipython-input-90-1315d47e2551> in <module>()
20 print '####################'
21 print featuresV[1]
---> 22 performParameterSelection(model, score, X_test, Ca_test, X_train, Ca_train)
23 print featuresV[2]
24 performParameterSelection(model, score, X_test, P_test, X_train, P_train)
<ipython-input-41-7075e1a49412> in performParameterSelection(model_name, criteria, X_test, y_test, X_train, y_train)
12 # cv=5 - constant; verbose - keep writing
13
---> 14 gs.fit(X_train, y_train) # Will get grid scores with outputs from ALL models described above
15
16 #pprint(sorted(gs.grid_scores_, key=lambda x: -x.mean_validation_score))
C:\Users\Tony\Anaconda\lib\site-packages\sklearn\grid_search.pyc in fit(self, X, y)
730
731 """
--> 732 return self._fit(X, y, ParameterGrid(self.param_grid))
90 if (y_type not in ["binary", "multiclass", "multilabel-indicator",
91 "multilabel-sequences"]):
---> 92 raise ValueError("{0} is not supported".format(y_type))
93
94 if y_type in ["binary", "multiclass"]:
ValueError: continuous is not supported
使用soil.head(15)后的一些数据。它没有显示所有列,但它应该以相同的方式使用8个特征而不是17个。至于目标:这些是最后5列,但这里的代码只计算了一个(Ca)
BSAN BSAS BSAV CTI ELEV EVI LSTD LSTN REF1 REF2 ... RELI Subsoil Topsoil TMAP TMFI Ca P pH SOC Sand
PIDN
92RkYor6 -0.405797 -0.563636 -0.806271 -0.228241 -0.691982 1.653790 -0.605889 0.627488 -0.856727 0.056586 ... -0.062181 0 1 0.896228 1.651807 -0.394962 0.031291 0.488676 -0.389042 0.630347
nPv9P04t -0.688406 -0.709091 -0.739082 -0.189180 1.185523 0.395773 -0.381748 -0.338928 -0.774545 -0.818182 ... 2.995923 1 0 1.539208 1.618022 -0.460044 -0.366432 -0.549490 0.204798 -1.162260
oCASbXEx -0.623188 -0.654545 -0.727884 -0.155835 0.711136 0.517493 -0.035002 -0.092554 -0.725818 -0.651206 ... -0.300034 1 0 0.286952 0.657765 0.259613 -0.407934 0.591558 -0.529688 -0.793082
xq94dGBz -0.746377 -0.781818 -0.862262 -0.340427 0.791314 0.672741 -0.665032 -0.128613 -0.853091 -0.741187 ... -0.418960 0 1 0.276740 0.678724 -0.467854 -0.245386 -0.577548 -0.428111 -0.130845
GYSYA8Yf -0.862319 -0.836364 -0.783875 -0.020427 4.715590 0.473032 -1.321194 -2.560069 -0.791273 -0.827458 ... 2.299354 1 0 0.583042 1.825040 1.442361 -0.328389 0.797320 -0.443738 -0.892037
G4e9Ahvi -0.710145 -0.736364 -0.727884 -0.175122 -1.003786 0.744898 -0.678329 0.851702 -0.661818 -0.474954 ... -0.300034 1 0 1.544703 1.641861 -0.355335 -0.079380 -0.287610 -0.256209 0.287810
SHU443XO -0.579710 -0.736364 -0.963046 -0.536744 -0.179733 1.793003 -0.914052 0.291898 -0.966545 -0.086271 ... 0.260618 0 1 1.840689 2.223996 -0.499961 0.155796 -0.886192 -0.107749 0.942435
oAeygDKu -0.152174 -0.154545 -0.134378 1.252267 -0.796659 -0.155977 1.309391 0.642680 -0.205818 -0.341373 ... -0.537887 1 0 -0.320335 0.429981 -0.441821 -0.352598 0.339031 -0.826609 1.650344
agBvYkUI -0.724638 -0.790909 -0.839866 0.114245 1.363697 0.726676 -1.687885 0.060034 -0.706909 -0.523191 ... 1.127081 1 0 1.254782 0.972442 -0.505456 -0.345681 -1.774712 0.071966 -1.207931
8ujcZd8d -0.427536 -0.600000 -0.806271 -0.667808 -1.208686 2.008018 -1.276453 1.203854 -0.698182 0.224490 ... 0.107713 0 1 0.288463 0.013744 -0.362277 -0.338764 0.039740 -0.232768 0.451467
hqO5LhmQ -0.644928 -0.690909 -0.772676 -0.195877 1.138753 0.390671 0.145537 -0.544813 -0.722909 -0.729128 ... -0.537887 0 1 0.153926 0.422784 -0.460333 -0.300721 -0.063142 -0.607825 1.208852
QsfH8CWp -0.449275 -0.618182 -0.862262 -0.512923 -0.712027 1.537901 -0.665190 0.595265 -0.884364 -0.103896 ... -0.028203 1 0 0.896228 1.651807 -0.475953 -0.252303 -0.128612 -0.670335 0.786391
5hhEGbrX -0.260870 -0.290909 -0.335946 -0.175122 -0.749889 0.400146 0.299908 0.567983 -0.423273 -0.244898 ... -0.520897 1 0 0.249117 0.907095 -0.142446 -0.397558 0.423206 -0.412483 -0.678903
XlJWsmdz -0.768116 -0.800000 -0.873460 -0.737115 0.682183 1.013848 -1.013065 -0.376346 -0.837818 -0.544527 ... 1.619776 1 0 0.942437 1.482143 -0.358517 1.283256 -0.072494 -0.490620 -0.899649
FY3riRgw -0.818841 -0.863636 -0.873460 -0.739177 1.715590 1.434402 -1.669818 -0.090647 -0.874909 -0.388683 ... 3.182807 0 1 1.254782 0.972442 -0.333063 0.020916 -0.942309 1.314342 -0.690321
15行×22列
答案 0 :(得分:6)
您的错误continuous is not supported
告诉我您正在尝试做某事&#34;&#34;来自分类域的回归域。
当你的目标回归时,至少有一件事能抓住我的眼睛:
scores = ['precision', 'recall']
首先,两者都与回归无关(正如@ zero323在对你的问题的评论中指出的那样):它们是分类的准确度量。从this sklearn docs page,section&#34; 3.3.1.1中尝试任何适合您口味的回归分数。常见情况:预定义值&#34;
就代码的其余部分而言,我强烈建议你从头开始重写你的代码:Lasso的chunk,Ridge的chunk,ElasticNet的chunk和SVM的chunk(为什么你要分别运行Ridge和Lasso来自ElasticNet,因为它们是ElasticNet的特例???)。这将使您不超过10-15行代码。只有在确保所有这些参数都执行后,才能找到最佳超参数,并计算出理想的回归指标,我会尝试优化代码并将所有内容放在一个循环中。
PS:
这些循环应该如何运行:
for score in scores:
for model in MODELS.keys():
在定义MODELS
之前?