如何使用最少的编码行对所有可能的组对执行多因素t检验。
我的例子:
3x功能:1,2,3
4x组:A,B,C,D
目标:对于每个功能测试所有组对:
1(A-B,A-C,A-D,B-C,B-D,C-D)
2(A-B,A-C,A-D,B-C,B-D,C-D)
图3(A-B,A-C,A-d,B-C,B-d,C-d)
= 18次T检验
目前我正在使用ddply和lapply:
library(plyr)
groupVector <- c(rep("A",10),rep("B",10),rep("C",10),rep("D",10))
featureVector <- rep(1:3,each=40)
mydata <- data.frame(feature=factorVector,group=groupVector,value=rnorm(120,0,1))
ddply(mydata,.(feature),function(x){
grid <- combn(unique(x$group),2, simplify = FALSE)
df <- lapply(grid,function(p){
sub <- subset(x,group %in% p)
pval <- t.test(sub$value ~ sub$group)$p.value
data.frame(groupA=p[1],groupB=p[2],pval=pval)
})
res <- do.call("rbind",df)
return(res)
})
答案 0 :(得分:0)
这是我的看法,虽然它是否可以说是“更好”
split.data <- split(mydata, mydata$feature)
pairs <- as.data.frame(matrix(combn(unique(mydata$group), 2), nrow=2))
library(tidyverse)
map_df(split.data, function(x) map_df(pairs, function(y) tibble(groupA = y[1], groupB = y[2],
pval = t.test(value ~ group, data = x, subset = which(x$group %in% y))$p.value)), .id="feature")
输出
# # A tibble: 18 x 4
# feature groupA groupB pval
# <chr> <chr> <chr> <dbl>
# 1 1 A B 0.28452419
# 2 1 A C 0.65114472
# 3 1 A D 0.77746420
# 4 1 B C 0.42546791
# 5 1 B D 0.39876582
# 6 1 C D 0.88079645
# 7 2 A B 0.57843592
# 8 2 A C 0.30726571
# 9 2 A D 0.55457986
# 10 2 B C 0.74871464
# 11 2 B D 0.24017130
# 12 2 C D 0.04252878
# 13 3 A B 0.01355117
# 14 3 A C 0.08746756
# 15 3 A D 0.24527519
# 16 3 B C 0.15130684
# 17 3 B D 0.09172577
# 18 3 C D 0.64206517