我是python的新手,我的代码稍微更改了这个code。 目前它正在抛出以下错误(我不明白为什么):
File "LassoNested.py", line 51, in <module>
lasso_regressor.fit(X_test_inner, y_test_inner), line 1071, in fit
alphas = np.tile(np.sort(alphas)[::-1], (n_l1_ratio, 1))
IndexError: too many indices for array
这里是代码 - 它使Lasso适合我的数据集,在外循环中执行模型评估,在内循环中进行模型选择(在那里找到套索的最佳alpha值):
import numpy as np
import operator
import csv
from sklearn import linear_model
from sklearn import cross_validation
if __name__ == "__main__":
# Load the training data
X = np.loadtxt("train.csv",delimiter=',', usecols=range(1,15))
y = np.loadtxt("train.csv", delimiter=',', usecols=range(15,16))
print "Number of training samples: " + str(X.shape[0])
outer_scores = []
# outer cross-validation
outer = cross_validation.KFold(len(y), n_folds=3, shuffle=True)
for fold, (train_index_outer, test_index_outer) in enumerate(outer):
X_train_outer, X_test_outer = X[train_index_outer], X[test_index_outer]
y_train_outer, y_test_outer = y[train_index_outer], y[test_index_outer]
inner_mean_scores = []
# define explored parameter space.
# procedure below should be equal to GridSearchCV
#tuned_parameter = np.logscale(-10,2,15)
alphas = [0.1,2,3]
for param in alphas:
inner_scores = []
# inner cross-validation
inner = cross_validation.KFold(len(X_train_outer), n_folds=3, shuffle=True)
for train_index_inner, test_index_inner in inner:
# split the training data of outer CV
X_train_inner, X_test_inner = X_train_outer[train_index_inner], X_train_outer[test_index_inner]
y_train_inner, y_test_inner = y_train_outer[train_index_inner], y_train_outer[test_index_inner]
lasso_regressor = linear_model.Lasso(alphas=param, cv = 10, normalize=True, fit_intercept=True)
lasso_regressor.fit(X_test_inner, y_test_inner)
inner_scores.append(lasso_regressor.score(X_test_inner, y_test_inner))
# calculate mean score for inner folds
inner_mean_scores.append(np.mean(inner_scores))
# get maximum score index
index, value = max(enumerate(inner_mean_scores), key=operator.itemgetter(1))
print 'Best parameter of %i fold: %i' % (fold + 1, tuned_parameter[index])
# fit the selected model to the training set of outer CV
# for prediction error estimation
lasso_regressor2 = linear_model.Lasso(alphas=param, cv = 10, normalize=True, fit_intercept=True)
lasso_regressor2.fit(X_train_outer, y_train_outer)
outer_scores.append(lasso_regressor2.score(X_test_outer, y_test_outer))
# show the prediction error estimate produced by nested CV
print 'Unbiased prediction error: %.4f' % (np.mean(outer_scores))
# finally, fit the selected model to the whole dataset
lasso_regressor3 = linear_model.Lasso(alphas=param, cv=10, normalize=True,fit_intercept=True)
lasso_regressor3.fit(X, y)