我总是冒出很多错误。我仍然在获得价值观,但我不确定它们的准确程度。在修复这些错误时,我尝试从顶部开始并使用我所拥有的变量定义,但是出现了空白。
_.chain(values) . groupBy(name) . map(key) . join('&') . value()
model.frame.default中的错误(formula = insampley~insamplex,data = insample,:变量的无效类型(列表)' insampley'
contdepdata <- read.delim("/Users/Jkels/Documents/Introduction to
Computational Statistics/winequality-
red.csv",sep=",",header=TRUE,stringsAsFactors=FALSE)
contdepdata <- na.omit(contdepdata)
numericvars <- c(1:11)
dim(contdepdata)
head(contdepdata)
insample <- contdepdata[1:500,]
outsample <- contdepdata[501:1000,]
insamplex <- insample[x,]
outsamplex <- outsample[x,]
insampley <- insample[y,]
outsampley <- outsample[y,]
x <- insample[,numericvars]
y <- insample$quality
lambdalevels <- 10^seq(7,-2,length=100)
lmout <- lm(insampley~insamplex, data=insample)
cbind(1,outsamplex)中的错误%*%lmout $ coefficients:需要数字/复杂矩阵/向量参数
答案 0 :(得分:1)
我认为这与你想要的很接近。 lm使用的公式/数据框策略与glmnet使用的矩阵策略之间存在巨大差异,我认为这是绊倒你的一部分。坦率地说,我发现glmnet对矩阵的使用令人失望(想想也许这是必要的,我以前从未使用过它。)
library(dplyr)
library(magrittr)
library(glmnet)
data =
data_frame(a = rnorm(10),
b = rnorm(10),
c = "delete",
quality = rnorm(10)) %>%
extract(c(1:2, which(names(.) == "quality")))
data.training =
data %>%
slice(1:5)
data.test =
data %>%
slice(6:10)
quality.predict =
( quality ~ . ) %>%
lm(data.training) %>%
predict(data.test) %>%
unname
MSE.standard =
data.test %>%
summarize(MSE = sum( (quality - quality.predict) ^ 2 / n() ) ) %>%
use_series(MSE)
lambda = 10^seq(7, -2, length = 10)
regression.lasso = cv.glmnet(data.training %>%
select(., -quality) %>%
as.matrix,
data.training$quality,
alpha = 1,
lambda = lambda )
quality.lasso =
predict(regression.lasso$glmnet.fit,
regression.lasso$lambda.min,
newx = data.test %>%
select(-quality) %>%
as.matrix
)[,1]
MSE.lasso =
data.training %>%
summarize(MSE = sum( (quality - quality.lasso) ^ 2 / n() ) ) %>%
use_series(MSE)