如何对我的数据应用配对t.test或Wilcoxon测试

时间:2017-02-03 10:27:42

标签: r usage-statistics

让我们从数据开始:

structure(list(Group = c("Mark", "Matt", "Tim", "Tom"), `1` = c(0.749552072382562, 
1.06820497349356, 1.00116263663573, 0.864987635002866), `2` = c(1.00839505250436, 
0.796306651704629, 1.02603677593328, 1.00321936833133), `3` = c(0.736638669191169, 
0.973483626272054, 1.14805519301778, 0.899272693725192), `4` = c(0.728882841159455, 
0.871211836418332, 1.0442119745299, 0.859935708928745), `5` = c(0.749552072382562, 
1.06820497349356, 1.00116263663573, 0.864987635002866), `6` = c(1.00839505250436, 
0.796306651704629, 1.02603677593328, 1.00321936833133), `7` = c(0.736638669191169, 
0.973483626272054, 1.14805519301778, 0.899272693725192), `8` = c(0.728882841159455, 
0.871211836418332, 1.0442119745299, 0.859935708928745), `9` = c(0.749552072382562, 
1.06820497349356, 1.00116263663573, 0.864987635002866), `10` = c(1.00839505250436, 
0.796306651704629, 1.02603677593328, 1.00321936833133), `11` = c(0.736638669191169, 
0.973483626272054, 1.14805519301778, 0.899272693725192), `12` = c(0.728882841159455, 
0.871211836418332, 1.0442119745299, 0.859935708928745), `13` = c(0.749552072382562, 
1.06820497349356, 1.00116263663573, 0.864987635002866), `14` = c(1.00839505250436, 
0.796306651704629, 1.02603677593328, 1.00321936833133), `15` = c(0.736638669191169, 
0.973483626272054, 1.14805519301778, 0.899272693725192), `16` = c(0.728882841159455, 
0.871211836418332, 1.0442119745299, 0.859935708928745), `17` = c(0.766036811789943, 
0.871085862829362, 1.02210371210681, 0.937452345474458), `18` = c(1.0357237385154, 
1.02805558505417, 0.946794300033338, 1.04688545274238), `19` = c(0.763210436944137, 
0.801397021884422, 0.952553568039278, 0.990226493248718), `20` = c(0.789338028300063, 
0.822815644347233, 0.958462750269733, 1.04183361434861), `21` = c(0.766036811789943, 
0.871085862829362, 1.02210371210681, 0.937452345474458), `22` = c(1.0357237385154, 
1.02805558505417, 0.946794300033338, 1.04688545274238), `23` = c(0.763210436944137, 
0.801397021884422, 0.952553568039278, 0.990226493248718), `24` = c(0.789338028300063, 
0.822815644347233, 0.958462750269733, 1.04183361434861), `25` = c(0.766036811789943, 
0.871085862829362, 1.02210371210681, 0.937452345474458), `26` = c(1.0357237385154, 
1.02805558505417, 0.946794300033338, 1.04688545274238), `27` = c(0.763210436944137, 
0.801397021884422, 0.952553568039278, 0.990226493248718), `28` = c(0.789338028300063, 
0.822815644347233, 0.958462750269733, 1.04183361434861), `29` = c(0.766036811789943, 
0.871085862829362, 1.02210371210681, 0.937452345474458), `30` = c(1.0357237385154, 
1.02805558505417, 0.946794300033338, 1.04688545274238), `31` = c(0.763210436944137, 
0.801397021884422, 0.952553568039278, 0.990226493248718), `32` = c(0.789338028300063, 
0.822815644347233, 0.958462750269733, 1.04183361434861), `33` = c(0.937894856206067, 
NA, 1.00383773624603, 1.04181193834546), `34` = c(1.03944921519508, 
NA, 0.983868286249464, 1.10409633668759), `35` = c(0.949802513948967, 
NA, 1.06522152108054, 1.04376827636719), `36` = c(0.965871712940006, 
NA, 1.18437146805406, 1.01355356488254), `37` = c(0.937894856206067, 
NA, 1.00383773624603, 1.04181193834546), `38` = c(1.03944921519508, 
NA, 0.983868286249464, 1.10409633668759), `39` = c(0.949802513948967, 
NA, 1.06522152108054, 1.04376827636719), `40` = c(0.965871712940006, 
NA, 1.18437146805406, 1.01355356488254), `41` = c(0.937894856206067, 
NA, 1.00383773624603, 1.04181193834546), `42` = c(1.03944921519508, 
NA, 0.983868286249464, 1.10409633668759), `43` = c(0.949802513948967, 
NA, 1.06522152108054, 1.04376827636719), `44` = c(0.965871712940006, 
NA, 1.18437146805406, 1.01355356488254), `45` = c(0.937894856206067, 
NA, 1.00383773624603, 1.04181193834546), `46` = c(1.03944921519508, 
NA, 0.983868286249464, 1.10409633668759), `47` = c(0.949802513948967, 
NA, 1.06522152108054, 1.04376827636719)), .Names = c("Group", 
"1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", 
"13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", 
"24", "25", "26", "27", "28", "29", "30", "31", "32", "33", "34", 
"35", "36", "37", "38", "39", "40", "41", "42", "43", "44", "45", 
"46", "47"), row.names = c(NA, 4L), class = "data.frame")

每一行都是我从两组比较得到的比率的集合。我想知道比率是否明显不同于1.因此,我想通过使用标题中提到的两个测试来测试每行(向量)是否不同于1。如何将这些测试应用于我的数据?请考虑每行可能有不同的长度。应忽略NAs。作为输出我希望有一个包含3列的表:Group namep-value t-testp.value Wilcoxon

有人可以帮助mi吗?

1 个答案:

答案 0 :(得分:3)

可能有一种方法可以使用原始数据框的行,但我强烈建议使用列(整齐的数据框)。

library(dplyr)
library(tidyr)

# assuming this is the name of your original dataset
dt

# reshape to create a column for each name
dt2 = data.frame(t(dt), stringsAsFactors = F)
names(dt2) = dt2[1,]
dt2 = dt2[-1,]
dt2[,names(dt2)] = sapply(dt2[,names(dt2)], as.numeric)

# reshape to create a column of names and values
dt3 = dt2 %>% 
  gather(name,value,Mark:Tom) %>%
  filter(!is.na(value))   # remove NAs

dt3 %>%
  group_by(name) %>%     # for each name
  summarise(pval_ttest = t.test(value, mu=1, data=.)$p.value,      # calculate t test p value
            pval_wilc = wilcox.test(value, mu=1, data=.)$p.value)  # calculate Wilcoxon p value


# # A tibble: 4 × 3
#    name   pval_ttest    pval_wilc
#   <chr>        <dbl>        <dbl>
# 1  Mark 4.408038e-09 1.020895e-06
# 2  Matt 6.679416e-06 2.502045e-04
# 3   Tim 1.777060e-02 6.932590e-02
# 4   Tom 2.433548e-01 5.148382e-01

关于配对t测试&#34;如何理解&#34;的一些额外信息您给出的测量值以及为什么差异和比率可能会产生不同的结果。 请考虑以下示例:

# paired t test of 2 vectors of same size (before and after treatment)
# it compares the means of those vectors
t.test(1:10, 13:4, paired = T)

# Paired t-test
# 
# data:  1:10 and 13:4
# t = -1.5667, df = 9, p-value = 0.1516
# alternative hypothesis: true difference in means is not equal to 0
# 95 percent confidence interval:
#   -7.331701  1.331701
# sample estimates:
#   mean of the differences 
# -3 


# t test that compares one vector's mean to 0
# that vector is the differences of the two initial vectors
t.test(1:10 - 13:4, mu=0)

# One Sample t-test
# 
# data:  1:10 - 13:4
# t = -1.5667, df = 9, p-value = 0.1516
# alternative hypothesis: true mean is not equal to 0
# 95 percent confidence interval:
#   -7.331701  1.331701
# sample estimates:
#   mean of x 
# -3 


# t test that compares one vector's mean to 1
# that vector is the ratios of the two initial vectors
t.test(1:10 / 13:4, mu=1)

# One Sample t-test
# 
# data:  1:10/13:4
# t = -0.46036, df = 9, p-value = 0.6562
# alternative hypothesis: true mean is not equal to 1
# 95 percent confidence interval:
#   0.3229789 1.4480623
# sample estimates:
#   mean of x 
# 0.8855206 

你可以看到配对t检验是对差异的简单t检验。矢量,这是可能的,因为你有2个相同长度的矢量(治疗前)。对比率进行简单的t检验并不完全相同。矢量。

因此,获得不同的结果是合理的,但在某些应用中,比率测试更好。检查你的参考书目。