我有一个特征数据集,每行有一个特征。我想执行t.test比较数据集中两个特定列的均值,即T_Mean和A_Mean或第2列和第8列。我试图使用sapply对此进行编码,没有运气,样本数据集和下面的代码。任何修复我的代码的帮助都非常感谢!
WW_Summary <- structure(list(Trait =
c("Morph PC1", "Morph PC2", "Morph PC3", "Morph PC4", "Colour", "Delta15N", "Delta13C"),
T_Mean = c(-0.000369133942377988, -0.478614374395391, -0.0429785751248986, 0.141563333908087, 5.09447415329768, 7.79253141831239, -20.3678994614004),
T_SD = c(1.25617540601557, 0.994922039584068, 0.72787412346719, 0.5683273217636, 1.85452769780342, 1.56401940841295, 2.33461396773921),
T_N = c(615, 615, 615, 615, 561, 557, 557),
HZ_Mean = c(0.379669406453242, 0.307293731157124, -0.0499328976749929, -0.0563021988086238, 4.74712643678161, 8.4568926056338, -20.8209771126761),
HZ_SD = c(1.27837645625113, 1.11890593333031, 0.71490087377916, 0.699316698091669, 1.90101932622734, 1.86547215761457, 1.9590774632374),
HZ_N = c(1137, 1137, 1137, 1137, 1131, 1136, 1136),
A_Mean = c(-0.818704170327851, -0.104449965981942, 0.157885253051751, -0.0437302662392194, 4.31320754716981, 9.79891783567134, -19.955250501002),
A_SD = c(1.29535566832773, 0.97478498249366, 0.678515276691309, 0.563663991917263, 1.63029422418466, 2.06376134152221, 1.47077203157055),
A_N = c(527, 527, 527, 527, 530, 499, 499)),
.Names = c("Trait", "T_Mean", "T_SD", "T_N", "HZ_Mean", "HZ_SD", "HZ_N", "A_Mean", "A_SD", "A_N"),
class = "data.frame", row.names = c(NA, -7L))
## Perform t-test separately for each trait (row) comparing means for T_Mean & A_Mean (columns 2 and 8)
WW_Summary_T <- data.frame(t(sapply(WW_Summary[,c(2,8)], function(temp)
unlist(t.test(temp, alternative = c("two.sided"))[c("statistic",
"parameter", "p.value", "conf.int")]))))
答案 0 :(得分:3)
我认为你只是使用了错误的apply
家庭成员。
你可以尝试一下,看看它能给你什么吗?
apply(WW_Summary[, c(2, 8)], 1,
function(temp) unlist(t.test(temp, aslternative = c("two.sided"))
[c("statistic", "parameter", "p.value", "conf.int")]))
@dickoa是正确的:你可能在这里做了错误的计算。不过,同样的概念适用:
data.frame(cbind(WW_Summary[1],
t(apply(WW_Summary[, c(2:4, 8:10)], 1, function(temp)
unlist(
tsum.test(mean.x = temp[[1]], s.x = temp[[2]], n.x = temp[[3]],
mean.y = temp[[4]], s.y = temp[[5]], n.y = temp[[6]]))
[c("statistic.t", "parameters.df", "p.value",
"conf.int1", "conf.int2")]))))
# Trait statistic.t parameters.df p.value
# 1 Morph PC1 10.7920944667109 1102.17477516966 6.99739270551733e-26
# 2 Morph PC2 -6.40501752763609 1119.8038108643 2.20872274986877e-10
# 3 Morph PC3 -4.8221965806503 1131.93025335657 1.61345381252079e-06
# 4 Morph PC4 5.51685228304417 1116.04949237415 4.28798959831121e-08
# 5 Colour 7.40032254940697 1083.43427031755 2.71950155801888e-13
# 6 Delta15N -17.6468194524627 923.361537684413 2.79180235004071e-60
# 7 Delta13C -3.47262865160519 949.662208494884 0.000538633884372937
# conf.int1 conf.int2
# 1 0.669552939095012 0.967117133675934
# 2 -0.48878427646537 -0.259544540361528
# 3 -0.2825915783163 -0.119136078036999
# 4 0.119393147514491 0.251194052780122
# 5 0.574117940682135 0.988415271573606
# 6 -2.22952047960588 -1.78325235511202
# 7 -0.645846712936065 -0.179451207860735
答案 1 :(得分:3)
由于您的数据已根据行进行汇总(仅限7个障碍),因此您无法使用基础R中的t.test
函数。
您可以在此特殊情况下使用BSDA
包和tsum.test
功能
require(BSDA) ## install.package("BSDA")
ddply(WW_Summary, .(Trait), summarise,
stat = tsum.test(mean.x = T_Mean, s.x = T_SD, n.x = T_N, mean.y = A_Mean, s.y = A_SD, n.y = A_N)$statistic,
pval = tsum.test(mean.x = T_Mean, s.x = T_SD, n.x = T_N, mean.y = A_Mean, s.y = A_SD, n.y = A_N)$p.value,
parameter = tsum.test(mean.x = T_Mean, s.x = T_SD, n.x = T_N, mean.y = A_Mean, s.y = A_SD, n.y = A_N)$parameters,
confint_lower = tsum.test(mean.x = T_Mean, s.x = T_SD, n.x = T_N, mean.y = A_Mean, s.y = A_SD, n.y = A_N)$conf.int[1],
confint_upper = tsum.test(mean.x = T_Mean, s.x = T_SD, n.x = T_N, mean.y = A_Mean, s.y = A_SD, n.y = A_N)$conf.int[2])
## Trait stat pval parameter confint_lower
## 1 Colour 7.4003 2.7195e-13 1083.43 0.57412
## 2 Delta13C -3.4726 5.3863e-04 949.66 -0.64585
## 3 Delta15N -17.6468 2.7918e-60 923.36 -2.22952
## 4 Morph PC1 10.7921 6.9974e-26 1102.17 0.66955
## 5 Morph PC2 -6.4050 2.2087e-10 1119.80 -0.48878
## 6 Morph PC3 -4.8222 1.6135e-06 1131.93 -0.28259
## 7 Morph PC4 5.5169 4.2880e-08 1116.05 0.11939
## confint_upper
## 1 0.98842
## 2 -0.17945
## 3 -1.78325
## 4 0.96712
## 5 -0.25954
## 6 -0.11914
## 7 0.25119
我们还可以绘制误差条来检查平均值之间可能的重叠。
WW_Summary <- ddply(WW_Summary, .(Trait), transform, T_STDERR = T_SD/sqrt(T_N),
A_STDERR = A_SD/sqrt(A_N),
HZ_STDERR = HZ_SD/sqrt(HZ_N))
ggplot(WW_Summary, aes(Trait)) + geom_errorbar(aes(y = T_Mean, ymax = T_Mean + T_STDERR, ymin = T_Mean - T_STDERR, colour = "black")) + geom_errorbar(aes(y = A_Mean, ymax = A_Mean + A_STDERR, ymin = A_Mean - A_STDERR, colour = "red")) + labs(y = "Mean") + scale_colour_manual(values = c("black", "red"), label = c("T", "A"), name = "")