rm(list = ls(all=T))
library(ISLR)
library(glmnet)
Hitters=na.omit(Hitters)
# Binary proble - Logistic regression
Hitters$Salary <- ifelse(Hitters$Salary > 1000, 1, 0)
Hitters$Salary <- as.factor(Hitters$Salary)
# the class is unbalanced
# > table(Hitters$Salary)
# 0 1
# 233 30
# cls <- sapply(Hitters, class)
# for(j in names(cls[cls == 'integer'])) Hitters[,j] <- as.double(Hitters[,j])
x = model.matrix(~ . -1, Hitters[,names(Hitters)[!names(Hitters) %in% c('Salary')]] )
inx_train <- 1:200
inx_test <- 201:dim(Hitters)[1]
x_train <- x[inx_train, ]
x_test <- x[inx_test, ]
y_train <- Hitters[inx_train, c('Salary')]
y_test <- Hitters[inx_test, 'Salary']
fit = cv.glmnet(x=x_train, y=y_train, alpha=1, type.measure='auc', family = "binomial")
plot(fit)
pred = predict(fit, s='lambda.min', newx=x_test)
quantile(pred)
# 0% 25% 50% 75% 100%
# -5.200853 -3.704760 -2.883836 -1.937052 1.386215
鉴于上述概率,我应该使用/修改哪个函数或参数来将它们转换为0到1之间?
答案 0 :(得分:5)
在预测调用中,您需要type="response"
参数集。根据文档,它返回拟合的概率。
pred = predict(fit, s='lambda.min', newx=x_test, type="response")
此外,如果您只是想要分类标签,可以使用type="class"