这是我打算做的事情(对于相当多的变量和数据集):
mygroupdf <- data.frame (varname = c("A", "B", "c1", "D2",
"E", "F", "g1"), group = c(1, 1, 1, 2,3,3,4))
> mygroupdf
varname group
1 A 1
2 B 1
3 c1 1
4 D2 2
5 E 3
6 F 3
7 g1 4
此数据框仅包含有关变量分组的信息:
group 1 = A, B, c1
group 2 = D2
group 3 = E, F
group 4 = g1
第二个数据集 - 包含实际数据
set.seed(1234)
dataf <- data.frame (yvar = rnorm (10, 10,3),
A = sample(c(1,0), 10, T), B = sample(c(1,0), 10, T),
c1 = sample (c(1,0), 10, T), D2 = sample (c(1,0), 10, T),
E= sample (c(1,0), 10, T),F = sample (c(1,0), T),
g1 = sample (c(1,0), 10, T))
# manual workout:
xtemp <- dataf$A* dataf$B * dataf$c1 # all from group 1
# I error in previous version it is * not +
# (is product of all members of a group i.e.
xtemp <- dataf$D2 (- group 2)
xtemp <- dataf$E * dataf$F (- group 3)
xtemp <- dataf$G (- group 4)
然后将产品与Yvar相关联:
x <- cor(dataf$yvar, xtemp)
我想将它包装到一个函数中,以便我可以将它应用于我的数据集中的1000组变量。
corrfun <- function (x, V1, V2, V3) {
xtemp <- V1 * V2 + V3
x <- cor(dataf$yvar, xtemp)
return (x)
}
由于不同的组有不同的变量,我不知道如何构建这样的函数并应用于整个数据集。请帮忙 !
编辑:处理:
答案 0 :(得分:3)
我会猜测......
corrfun <- function (group.no, x=dataf, x.lookup=mygroupdf) {
xtemp <- apply(x[x.lookup$varname[x.lookup$group == group.no]], 1, prod)
out <- cor(x$yvar, xtemp)
return (out)
}
> corrfun(1)
[1] 0.35593
> corrfun(2)
[1] 0.4181311
>
答案 1 :(得分:0)
另一个答案..
cbind(
group = unique(mygroupdf$group),
corr =
do.call(
c,
lapply(
unique(mygroupdf$group),
function(x) {
varnames <- unique(mygroupdf[mygroupdf$group == x, 'varname'])
products <- apply(as.matrix(dataf[, colnames(dataf) %in% varnames]), 1, prod)
cor(products, dataf$yvar)
}
)
)
)
给出了
group corr
[1,] 1 0.3559300
[2,] 2 0.4181311
[3,] 3 NA
[4,] 4 -0.1015003
答案 2 :(得分:0)
sapply(unique(mygroupdf$group), function(x) {
a <- as.character(mygroupdf$varname[mygroupdf$group == x])
cor(dataf$yvar, apply(dataf[a],1,prod))
})
a
成为相应的变量名称答案 3 :(得分:0)
使用我最喜欢的库创建另一个答案:
library(plyr)
ddply(mygroupdf, .(group), summarise,
cor=cor(dataf$yvar, apply(dataf[as.character(varname)],1,prod)))
这将产生以下结果:
group cor
1 1 0.3559300
2 2 0.4181311
3 3 NA
4 4 -0.1015003
Warning message:
In cor(dataf$yvar, apply(dataf[as.character(varname)], 1, prod)) :
the standard deviation is zero