我试图将因子变量的级别与变量名称(模型返回的格式)分开。
我的字符串(抱歉:已修改更具代表性)
vars <- c("(Intercept)", "wt", "gearyy", "cyl4", "cyl8")
预期结果(虽然列表也可以)
# [,1] [,2]
#[1,] "wt" ""
#[2,] "gear" "yy"
#[3,] "cyl" "4"
#[4,] "cyl" "8"
我的尝试:我以为我可以grep
部分搜索(但没有成功)
grep(paste0("\\b", "cyl", "\\b") , est$vars )
模特:
library(glmnet)
mtcars$gear <- factor(mtcars$gear, labels=c("xx", "yy", "zz"))
mtcars$am <- factor(mtcars$am, labels=c("yes", "no"))
mtcars$cyl <- factor(mtcars$cyl)
x <- model.matrix(~ wt + disp + gear + am + cyl, data=mtcars,
contrasts.arg = lapply(mtcars[sapply(mtcars, is.factor)],
contrasts, contrasts=FALSE))
fit <- glmnet(x, mtcars$mpg)
cfs <- coef(fit, s=0.5)
est <- data.frame(vars=cfs@Dimnames[[1]][cfs@i+1], est=cfs@x, stringsAsFactors=F)
答案 0 :(得分:3)
尝试
pat <- paste(colnames(mtcars), collapse="|")
v2 <- sub(pat, '', vars[-1])
v1 <- sub(paste(v2[nzchar(v2)], collapse='|'), '', vars[-1])
data.frame(v1, v2)
# v1 v2
#1 wt
#2 gear yy
#3 cyl 4
#4 cyl 8
根据OP的评论,最好有
v1 <- sub(paste0(paste0(v2[nzchar(v2)], "+$"), collapse='|'), '', vars)