使用 LASSO 进行交叉验证

时间:2021-03-31 05:24:50

标签: r machine-learning lasso-regression

我想用 LASSO 来训练下面的数据集,使用 10-fold CV 对每种方法重复 5 次。

这是我当前的代码,但我收到一条错误消息,我认为我做错了。请告诉我应该如何更改它!

错误信息:

Error in if (nulldev == 0) stop("y is constant; gaussian glmnet fails at standardization step") : missing value where TRUE/FALSE needed

代码:

set.seed(150847)

model.mat <- model.matrix( Class ~ ., train.dat)

x.train <- model.mat [, -1]

model.mat <- model.matrix( Class ~ ., test.dat) 
x.test <- model.mat [, -1] #

dv = "Class"
y.train <- as.matrix(train.dat[, dv])
y.test <- as.matrix(test.dat[, dv])

#-----------------------------------------

train.means <- colMeans(x.train)
train.sd <- apply(x.train, 2, sd, na.rm = TRUE)

x.train <- as.matrix( scale(x.train, center=TRUE, scale=TRUE)) 

x.test <- scale(x.test, center = train.means, scale = train.sd)

tr.Control <- trainControl(
  method = "repeatedcv",
  number = 10,
  repeats = 5)

folds.k = 10

cv.lasso = cv.glmnet (x.train,y.train,alpha=1, nfolds=folds.k, 
        standardize = FALSE , intercept=TRUE)

训练数据“train.dat”

structure(list(x1 = c(-2.48941991263215, -0.338448010439568, 
-1.07796826066294, 1.47833943928667, -0.19013864138727), x2 = c(-1.05660014431803, 
-1.75938416652951, -1.94445363537753, 2.65603302304451, -0.818464313993987
), x3 = c(-0.928819609794076, -0.24431689960579, -0.26055539595143, 
-0.500006066823682, 0.19947842697796), x4 = c(0.167674885884102, 
-0.714651010370962, 0.501841366660604, -0.261356553409404, -0.121081806911108
), x5 = c(0.826293680351228, -0.0522530856542289, 0.456970179919153, 
-0.483860304113477, 0.827117071952671), x6 = c(0.229410925647244, 
0.367363323224708, 0.0097867208532989, 0.6599692159798, 0.454895325470716
), x7 = c(0.277445634594187, 0.00411403737962246, 0.912381467409432, 
0.0911673668306321, 0.0729619956109673), x8 = c(0.403632419444111, 
-1.76177968998027, 0.818339220424296, 0.77257524859948, -1.45634200383022
), x9 = c(0.666298305218494, 1.28068782733132, 0.243489971387096, 
0.00907678612957343, 0.0688231437305274), x10 = c(-0.674113519037765, 
-0.221583500325269, 0.555570222138564, 0.572105515491289, 2.32224808146226
), x11 = c(-0.503906052691753, -0.170463238913734, 1.81239693119702, 
-0.310259330876175, 0.373355276436323), x12 = c(0.569346066655445, 
0.665270271264321, -1.04590277174209, -1.08749423169221, -0.717326819631265
), Class = c("No", "Yes", "Yes", "No", "Yes")), row.names = c(NA, 
5L), class = "data.frame")

测试数据“test.dat”

structure(list(x1 = c(-1.64667008195797, -1.12098964581992, 0.473422701448559, 
-1.60461690923768, -0.00749172927415004), x2 = c(-1.16286992117132, 
-0.141705544905757, 1.51853911670816, -0.424087214057948, 0.377124786278201
), x3 = c(1.19241045039945, 1.48443779149667, -1.88482327525843, 
0.534626743634202, 0.310818572560298), x4 = c(-0.930735300584522, 
-1.7670393982441, -1.14191107118164, 0.61126176594059, 0.155931701957036
), x5 = c(-0.820323897991329, -0.926557129248977, 0.965568253770471, 
-0.599971735384315, -0.512967912014574), x6 = c(0.29261250467971, 
0.269901459803805, 0.9167238867376, 0.000339579302817583, 0.398331164848059
), x7 = c(0.262839384144172, 0.246979274321347, 0.105181680992246, 
0.170856263954192, 0.310768554685637), x8 = c(-0.547301867028384, 
0.888728318998235, 0.289208399599502, -1.37035914659536, -1.25498394079555
), x9 = c(1.91475599789737, -0.663301448358402, 0.73492523418078, 
0.252558835925375, -1.13201069045815), x10 = c(-1.56005878668401, 
0.47042681670553, -0.294760033296374, -3.26819220678081, -0.921147419029862
), x11 = c(0.287604446919617, -0.161270837465456, -2.23402479016399, 
-2.25198777628389, -0.61977925827879), x12 = c(1.21464666058049, 
-0.752948562276805, -1.03149583160279, 0.618987295189923, 0.274782272114187
), Class = c("No", "No", "No", "Yes", "Yes")), row.names = c(NA, 
5L), class = "data.frame")

0 个答案:

没有答案