我正在尝试在第三个值的组代码之后的两列上执行t.test或wilcox.test。 这是我的数据
dput(data1)
structure(list(moda = structure(c(20L, 20L, 20L, 20L, 20L, 20L,
20L, 20L, 20L, 20L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 17L, 17L, 17L, 17L, 17L,
17L, 17L, 17L, 17L, 17L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L,
18L, 18L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 20L, 20L,
20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 14L,
14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L,
15L, 15L, 15L, 15L, 15L, 15L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L,
21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 22L, 22L, 22L, 22L,
22L, 22L, 22L, 22L, 22L, 22L, 23L, 23L, 23L, 23L, 23L, 23L, 23L,
23L, 23L), .Label = c("ACN1", "ACN2", "BA", "BM", "BS1", "BS2",
"CN", "EK5", "HW1", "HW2", "HW3", "L27", "L5K", "LC", "M2K",
"M630", "PB1", "PB2", "PB3", "PG", "RMB", "RMC", "RMM"), class = "factor"),
epicotyle = c(1.5, 1.5, 2, 1, 1.5, 1.2, 1, 2.4, 1.3, 1.4,
1.7, 2, 1.8, 2.3, 2.5, 2.5, 1.5, 1.5, 2, 1.3, 1.5, 1.8, 1.3,
1.8, 1.7, 1.5, 2.3, 1.8, 2.2, 1.5, 1.5, 1.5, 1.3, 1.5, 1.5,
1.5, 1.5, 1.8, 1.5, 2.1, 1.8, 1.3, 2, 1.5, 2, 3.5, 1.5, 1.7,
1.7, 2, 1.7, 2, 1.5, 2, 1.5, 2, 2, 1.5, 2, 1.5, 1.8, 1, 2,
3, 1.6, 1.5, 1.5, 1.3, 1.5, 1.5, 1.2, 1.5, 1.5, 1, 1.2, 1.5,
1.5, 1.5, 1.5, 2, 1.1, 1.5, 1.5, 1.7, 1.8, 1.5, 1.3, 1.5,
1.5, 2.5, 1.2, 1.4, 1, 1.5, 2, 1.5, 1.2, 1.5, 2, 2.3, 2.1,
2, 2.4, 1.5, 1.7, 1.4, 2.4, 1, 1, 2, 1.5, 1.2, 2.4, 1.2,
1, 0.8, 1.8, 1.5, 1.5, 2, 1, 1.5, 1.2, 1, 2.4, 1.3, 1.4,
1.5, 1.5, 1.5, 2.1, 1.5, 1.4, 1.5, 1.3, 1.5, 3, 2.6, 1.5,
2.2, 1.9, 1.5, 1.4, 1.4, 2.5, 2.1, 2, 1.5, 2, 2, 2, 1.5,
2.1, 2, 1.5, 2.5, 2.5, 3, 3, 3.5, 3.5, 3, 2, 2.5, 3.5, 1,
1.2, 1.5, 2.5, 1.5, 1.5, 1.5, 1.5, 1.5, 2.4, 1.5, 2, 3, 1.7,
3, 2.5, 2, 2.5, 2.5, 2.5, 1.5, 1.5, 1.5, 1, 1.5, 2, 1.4,
1.2, 1.7, 2.1, 1.5, 2, 1.5, 1.5, 2, 1.4, 2, 3, 2, 2, 1.5,
1.5, 2, 1, 1.5, 1.2, 1, 2.4, 1.3, 1.4, 2, 2.5, 3, 3, 1.7,
3, 1.8, 2, 1.8, 2.2, 2.3, 1.5, 2, 1.8, 1.8, 1.3, 2, 1.8,
1.8, 2, 1.8, 1.5, 1.7, 2, 1.4, 1.5, 1.7, 1.5), hypocotyle = c(1.5,
1.5, 2, 1, 1.5, 1.2, 1, 2.4, 1.3, 1.4, 5, 7, 2.5, 6.5, 5.4,
5, 6, 5.7, 7, 5.5, 5.7, 5.5, 7, 6.5, 5.5, 5.5, 6.7, 4.9,
5.3, 6.7, 5.8, 6.5, 6, 5.6, 5, 5.5, 6, 6, 6, 3.5, 4.7, 4.5,
5.9, 5, 6, 7, 6, 5.5, 5, 5.8, 5.5, 5.5, 4.8, 5.7, 6, 7, 5.2,
5, 5.2, 5.3, 5.6, 5, 5.3, 6, 5, 5.5, 4.5, 5.7, 6, 4.5, 4.4,
5.2, 5.2, 4.1, 5.2, 5.2, 5.4, 6, 5.5, 6.5, 5, 6, 5.5, 7.5,
5.2, 5.6, 5.4, 5.5, 5, 5, 6, 5.2, 6, 6.3, 6.3, 4.2, 5.1,
3.5, 6, 6, 6, 6, 5, 5, 6, 5, 5.6, 5.5, 5, 5, 6, 5.2, 6, 6.3,
6.3, 4.2, 5.1, 3.8, 4, 7, 5, 6, 4, 5.4, 3.5, 3.6, 5, 6, 4.8,
4.7, 4.4, 5.5, 3.5, 5.3, 4.3, 5.5, 4.5, 5.5, 4.2, 6, 4.3,
4, 4.7, 3.5, 3.7, 4.2, 5, 5, 5.1, 5.7, 5, 3.5, 4, 5.6, 3.9,
3.5, 7, 6, 6, 6, 6.5, 5.5, 4.5, 6.5, 6.5, 3, 5, 5.5, 5.3,
4, 5.5, 6, 4, 5.5, 6, 5, 4, 4.5, 4.5, 4, 3.5, 4.5, 5, 4,
4.5, 5, 4.7, 6, 3.8, 4.5, 4.1, 4, 3.7, 4, 4.5, 5, 6, 4.5,
6, 5.7, 3.7, 5.8, 6.2, 5.5, 5, 3.8, 4, 7, 5, 6, 4, 5.4, 3.5,
3.6, 5, 7, 6.5, 8, 6.5, 5.7, 7.5, 7.3, 7.4, 7, 5.4, 6.5,
6.5, 7.2, 7.4, 6, 6.5, 6, 7, 6, 7, 6.5, 6.5, 6.5, 8, 5.7,
6.5, 6, 7)), class = "data.frame", row.names = c(NA, -243L
))
bartl <- function(p, yy=data1[,1]){barte <- bartlett.test(p,yy)}
aggre <- function (x, y=data1[,1]) aggregate(formula =x~data1[,1],data = data1, FUN = t.test)
lshap <- lapply(data1[-1], FUN = aggre, y=data1[,1])
lshap
或者我也尝试过其他东西
result <- by(data1[-1], data1[,1],
function(x) t.test(data1[,2], data1[,3], mu=0, alt="two.sided", paired = TRUE, conf.level = 0.95))
result$p-value
ttest.pval <- sapply(result, '[[', 'p.value')
ttest.pval
但是看起来它并不在乎我的组的代码,因此pvalue相同。
我的最终目标是创建一个脚本,该脚本在根据shapiro测试pvalue进行先前的均方差检验后,给出正确的t.test pvalue或wilcox。
答案 0 :(得分:0)
只需使用传递给x
函数的by
变量,该变量是子集数据帧。您会忽略使用它,因此会得到重复的结果。为了清楚起见,下面为 subdata 重命名x
并调整返回对象:
result_list <- by(data1, data1$moda, function(sub_data) {
res <- t.test(sub_data$epicotyle, sub_data$hypocotyle, mu=0, alt="two.sided",
paired = TRUE, conf.level = 0.95)
output <- c(t_stat=unname(res$estimate), p_value=res$p.value)
})
result_matrix <- do.call(rbind, result_list)
result_matrix
# t_stat p_value
# ACN1 -3.355556 1.562532e-06
# ACN2 -2.488889 1.711867e-05
# BA -3.480000 8.156509e-08
# BM -4.070000 4.481620e-08
# BS1 -3.411111 2.882650e-06
# BS2 -3.788889 6.311561e-08
# CN -3.930000 1.464457e-07
# EK5 -2.830000 3.638520e-07
# HW1 -3.588889 5.753743e-05
# HW2 -4.090000 4.047017e-08
# HW3 -4.400000 4.571253e-10
# L27 -2.900000 4.274705e-06
# L5K -3.370000 5.018480e-07
# LC -2.030000 2.347509e-05
# M2K -2.890000 4.196188e-07
# M630 -3.450000 2.544427e-08
# PB1 -3.500000 3.465025e-10
# PB2 -3.720000 2.646581e-09
# PB3 -4.280000 4.970850e-09
# PG -2.166667 4.627951e-07
# RMB -4.677778 4.395313e-08
# RMC -4.600000 1.166993e-08
# RMM -4.955556 3.320310e-09