上下文:在R中给出了用于进行ANOVA的格式的堆叠数据,如下面的示例子集所示(数据未按原始顺序排列):
IV_B1 IV_B2 IV_W DV
1 1 1 12
1 1 2 42
1 2 1 25
1 2 2 29
2 1 1 13
2 1 2 49
2 2 1 45
2 2 2 34
目标:我们的目标是计算配对t检验,IV_W
在因子内,IV_B1
和IV_B2
在因子之间,因此配对由IV_W
定义,其IV_B1
交叉IV_B2
:
Pair one (P1): (IV_B1 = 1, IV_B2 = 1, IV_W = 1), (IV_B1 = 1, IV_B2 = 1, IV_W = 2)
Pair two (P2): (IV_B1 = 1, IV_B2 = 2, IV_W = 1), (IV_B1 = 1, IV_B2 = 2, IV_W = 2)
...
总计:
P1 = [(1, 1, 1), (1, 1, 2)]
,P2 = [(1, 2, 1), (1, 2, 2)]
,P3 = [(2, 1, 1), (2, 1, 2)]
,P4 = [(2, 2, 1), (2, 2, 2)]
,因此在给定的情况下,手动命令为t.test(c(12, 25, 13, 45),c(42, 29, 49, 34), paired=TRUE)
。
问题:如何在R中进行此类配对t检验以获取以下数据:
答案 0 :(得分:2)
以下是使用reshape
的替代解决方案:
df <- structure(list(IV_B1 = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L), IV_B2 = c(1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L), IV_W = c(1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L), DV = c(12L, 42L, 25L, 29L, 13L, 49L, 45L, 34L)), .Names = c("IV_B1",
"IV_B2", "IV_W", "DV"), class = "data.frame", row.names = c(NA,
-8L))
df
# IV_B1 IV_B2 IV_W DV
# 1 1 1 1 12
# 2 1 1 2 42
# 3 1 2 1 25
# 4 1 2 2 29
# 5 2 1 1 13
# 6 2 1 2 49
# 7 2 2 1 45
# 8 2 2 2 34
# Add an id column
( df <- cbind(df, id=rep(1:(nrow(df)/2),each=2)) )
# IV_B1 IV_B2 IV_W DV id
# 1 1 1 1 12 1
# 2 1 1 2 42 1
# 3 1 2 1 25 2
# 4 1 2 2 29 2
# 5 2 1 1 13 3
# 6 2 1 2 49 3
# 7 2 2 1 45 4
# 8 2 2 2 34 4
# From long to wide format
( df.wide <- reshape(df, idvar="id", v.names=c("IV_B1","IV_B2","DV"),
timevar = "IV_W", direction = "wide") )
# id IV_B1.1 IV_B2.1 DV.1 IV_B1.2 IV_B2.2 DV.2
# 1 1 1 1 12 1 1 42
# 3 2 1 2 25 1 2 29
# 5 3 2 1 13 2 1 49
# 7 4 2 2 45 2 2 34
# Paired t-test
tt <- t.test(df.wide$DV.1,df.wide$DV.2, paired=T)
# Calculate differences
difs <- df.wide$DV.1-df.wide$DV.2
# Mean difference
( mean_diff <- tt$estimate )
# mean of the differences
# -14.75
mean(difs)
# Standard error of the difference
( se_mean_diff <- sd(difs)/sqrt(length(difs)) )
# [1] 11.04064
# T statistic
( T <- tt$statistic )
# t
# -1.335973
mean_diff/se_mean_diff
# Degrees of freedom
( dof <- tt$parameter )
# df
# 3
# t-test p-value
( pv <- tt$p.value )
# [1] 0.2738612
2 * (1 - pt(abs(T), dof))
# 95% confidence intervals
( CI <- tt$conf.int )
# [1] -49.88626 20.38626
# attr(,"conf.level")
# [1] 0.95
c(mean_diff - qt(0.975,dof)*se_mean_diff,
mean_diff + qt(0.975,dof)*se_mean_diff)
答案 1 :(得分:1)
P1 = subset(df, (IV_B1 == 1 & IV_B2 == 1 & IV_W == 1) |
(IV_B1 == 1 & IV_B2 == 2 & IV_W == 1) |
(IV_B1 == 2 & IV_B2 == 1 & IV_W == 1) |
(IV_B1 == 2 & IV_B2 == 2 & IV_W == 1))
P1 = P1$DV
P2 = subset(df, (IV_B1 == 1 & IV_B2 == 1 & IV_W == 2) |
(IV_B1 == 1 & IV_B2 == 2 & IV_W == 2) |
(IV_B1 == 2 & IV_B2 == 1 & IV_W == 2) |
(IV_B1 == 2 & IV_B2 == 2 & IV_W == 2))
P2 = P2$DV
TT = t.test(P1, P2, paired=TRUE)
pval = TT$p.value
mdiff = TT$estimate
df = TT$parameter
tval = TT$statistic