Question

上下文：在R中给出了用于进行ANOVA的格式的堆叠数据，如下面的示例子集所示（数据未按原始顺序排列）：

 IV_B1 IV_B2 IV_W  DV
 1     1     1     12
 1     1     2     42 
 1     2     1     25
 1     2     2     29
 2     1     1     13
 2     1     2     49
 2     2     1     45
 2     2     2     34

目标：我们的目标是计算配对t检验，IV_W在因子内，IV_B1和IV_B2在因子之间，因此配对由IV_W定义，其IV_B1交叉IV_B2：

 Pair one (P1): (IV_B1 = 1, IV_B2 = 1, IV_W = 1), (IV_B1 = 1, IV_B2 = 1, IV_W = 2) 
 Pair two (P2): (IV_B1 = 1, IV_B2 = 2, IV_W = 1), (IV_B1 = 1, IV_B2 = 2, IV_W = 2)
 ...

总计： P1 = [(1, 1, 1), (1, 1, 2)]，P2 = [(1, 2, 1), (1, 2, 2)]，P3 = [(2, 1, 1), (2, 1, 2)]，P4 = [(2, 2, 1), (2, 2, 2)]，因此在给定的情况下，手动命令为t.test(c(12, 25, 13, 45),c(42, 29, 49, 34), paired=TRUE)。

问题：如何在R中进行此类配对t检验以获取以下数据：

p值
具有自由度的t值
装置
标准均值错误？

Answer 1

以下是使用reshape的替代解决方案：

df <- structure(list(IV_B1 = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L), IV_B2 = c(1L, 
1L, 2L, 2L, 1L, 1L, 2L, 2L), IV_W = c(1L, 2L, 1L, 2L, 1L, 2L, 
1L, 2L), DV = c(12L, 42L, 25L, 29L, 13L, 49L, 45L, 34L)), .Names = c("IV_B1", 
"IV_B2", "IV_W", "DV"), class = "data.frame", row.names = c(NA, 
-8L))
df
#   IV_B1 IV_B2 IV_W DV
# 1     1     1    1 12
# 2     1     1    2 42
# 3     1     2    1 25
# 4     1     2    2 29
# 5     2     1    1 13
# 6     2     1    2 49
# 7     2     2    1 45
# 8     2     2    2 34

# Add an id column
( df <- cbind(df, id=rep(1:(nrow(df)/2),each=2)) )
#   IV_B1 IV_B2 IV_W DV id
# 1     1     1    1 12  1
# 2     1     1    2 42  1
# 3     1     2    1 25  2
# 4     1     2    2 29  2
# 5     2     1    1 13  3
# 6     2     1    2 49  3
# 7     2     2    1 45  4
# 8     2     2    2 34  4

# From long to wide format   
( df.wide <- reshape(df, idvar="id", v.names=c("IV_B1","IV_B2","DV"), 
                 timevar = "IV_W", direction = "wide") )

#   id IV_B1.1 IV_B2.1 DV.1 IV_B1.2 IV_B2.2 DV.2
# 1  1       1       1   12       1       1   42
# 3  2       1       2   25       1       2   29
# 5  3       2       1   13       2       1   49
# 7  4       2       2   45       2       2   34

# Paired t-test 
tt <- t.test(df.wide$DV.1,df.wide$DV.2, paired=T)

# Calculate differences
difs <- df.wide$DV.1-df.wide$DV.2

# Mean difference
( mean_diff <- tt$estimate )
# mean of the differences 
#                  -14.75 
mean(difs)

# Standard error of the difference
( se_mean_diff <- sd(difs)/sqrt(length(difs)) )
# [1] 11.04064

# T statistic
( T <- tt$statistic )
#         t 
# -1.335973 
mean_diff/se_mean_diff

# Degrees of freedom
( dof <- tt$parameter )
# df 
#  3 

# t-test p-value 
( pv <- tt$p.value )
# [1] 0.2738612
2 * (1 - pt(abs(T), dof))

# 95% confidence intervals
( CI <- tt$conf.int )
# [1] -49.88626  20.38626
# attr(,"conf.level")
# [1] 0.95
c(mean_diff - qt(0.975,dof)*se_mean_diff, 
  mean_diff + qt(0.975,dof)*se_mean_diff)

Answer 2

P1 = subset(df, (IV_B1 == 1 & IV_B2 == 1 & IV_W == 1) |
                (IV_B1 == 1 & IV_B2 == 2 & IV_W == 1) |
                (IV_B1 == 2 & IV_B2 == 1 & IV_W == 1) |
                (IV_B1 == 2 & IV_B2 == 2 & IV_W == 1))
P1 = P1$DV

P2 = subset(df, (IV_B1 == 1 & IV_B2 == 1 & IV_W == 2) |
              (IV_B1 == 1 & IV_B2 == 2 & IV_W == 2) |
              (IV_B1 == 2 & IV_B2 == 1 & IV_W == 2) |
              (IV_B1 == 2 & IV_B2 == 2 & IV_W == 2))
P2 = P2$DV

TT = t.test(P1, P2, paired=TRUE)
pval = TT$p.value
mdiff = TT$estimate
df = TT$parameter
tval = TT$statistic

R：混合设计中堆叠数据的成对t检验

2 个答案: