我正在尝试简化下面的代码。我想从长到宽转换数据帧列表,然后根据数据集中包含的变量向每个嵌套数据框添加几个变量。以下代码生成我的首选输出,我想帮助理解如何减少步骤,并可能在一个lapply命令中执行此操作。我试过几次试验,结合“与”的陈述无济于事。
dflist <- list(data.frame(ID=(c(rep(1,10),rep(2,10))),Y=(c(rep(1,5),rep(2,5),rep(1,5),rep(2,5))), b=rnorm(20),c=rnorm(20)),
data.frame(ID=(c(rep(1,10),rep(2,10))), Y=(c(rep(1,5),rep(2,5),rep(1,5),rep(2,5))),b=rnorm(20),c=rnorm(20)),
data.frame(ID=(c(rep(1,10),rep(2,10))), Y=(c(rep(1,5),rep(2,5),rep(1,5),rep(2,5))),b=rnorm(20),c=rnorm(20)))
wide_data<-lapply(dflist, function(x) dcast(setDT(x), ID ~ Y, value.var=c('b','c'),mean))
b_flag<-lapply(wide_data, function(x) with(x,ifelse((b_1 < .30 | b_2 >.95),"Flag",NA)))
c_flag<-lapply(wide_data, function(x) with(x,ifelse((c_1 < 0) & (c_1 < 0),"Flag",NA)))
wide_data<-Map(cbind, wide_data, b_flag = b_flag)
wide_data<-Map(cbind, wide_data, c_flag = c_flag)
wide_data
答案 0 :(得分:1)
我同意你1 lapply
会更好:
wide_data <- lapply(dflist, function(x) {
tmp <- dcast(setDT(x), ID ~ Y, value.var=c('b','c'), mean)
tmp$b_flag <- ifelse((tmp$b_1 < .30 | tmp$b_2 >.95) , "Flag", NA)
tmp$c_flag <- ifelse((tmp$c_1 < 0) & (tmp$c_2 < 0), "Flag", NA)
tmp
})
答案 1 :(得分:1)
在不使用dcast
此外,c
列的条件在您的问题中不明确。请检查它是否正确并编辑您的问题。
library('data.table')
df2 <- lapply( dflist, function(x) {
x <- setDT(x)[, .(b = mean(b), c = mean(c)), by = .(ID, Y)]
x[ , `:=` ( b_flag = ifelse( any(b[Y == 1 ] < 0.30, b[Y == 2] > 0.95), "Flag", NA_character_ ),
c_flag = ifelse( all(c < 0), "Flag", NA_character_ ) ),
by = ID ]
return( x )
} )
df2 <- rbindlist(l = df2)
df2
# ID Y b c b_flag c_flag
# 1: 1 1 0.198227292 0.57377712 Flag NA
# 2: 1 2 0.578991810 0.40128112 Flag NA
# 3: 2 1 0.578724225 0.30608932 NA NA
# 4: 2 2 0.619338292 0.35209122 NA NA
# 5: 1 1 0.321089583 -0.83979393 NA NA
# 6: 1 2 -0.341194581 0.52508394 NA NA
# 7: 2 1 0.179836568 -0.02041203 Flag NA
# 8: 2 2 0.482725899 0.17163968 Flag NA
# 9: 1 1 0.003591178 -0.30250232 Flag NA
# 10: 1 2 -0.230479093 0.01971357 Flag NA
# 11: 2 1 -0.038689389 0.35717286 Flag NA
# 12: 2 2 0.691364217 -0.37037455 Flag NA