相对于两个实验,我有一个包含两组值(value_1和value_2)的df。
一个实验包含两个组(0和1),另一个包含三个组(0,1,2)。
test group Value_1 Value_2
AA 0 15.1 11.2
AA 0 12.4 8.6
AA 1 9.6 22.5
AA 1 10.2 22
BB 0 12.11 11
BB 0 14 1.2
BB 1 11 13.2
BB 1 12.3 9
BB 2 14.2 12
BB 2 15 13
df <- structure(list(test = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L), .Label = c("AA", "BB"), class = "factor"), group = c(0L,
0L, 1L, 1L, 0L, 0L, 1L, 1L, 2L, 2L), Value_1 = c(15.1, 12.4,
9.6, 10.2, 12.11, 14, 11, 12.3, 14.2, 15), Value_2 = c(11.2,
8.6, 22.5, 22, 11, 1.2, 13.2, 9, 12, 13)), .Names = c("test",
"group", "Value_1", "Value_2"), class = "data.frame", row.names = c(NA,
-10L))
我想通过测试,按小组 - 对value_1和value_2应用排列测试 - 涵盖:
到目前为止我做了什么 - 分成几部分:
当这些组只有两个时,我只需应用oneway.test():
df %>%
filter(test %in% 'AA') -> df_test_aa
df_test_aa_value_1 <- oneway.test(df_test_aa$Value_1~df_test_aa$group)
df_test_aa_value_1$p.value
[1] 0.2011234
df_test_aa_value_2 <- oneway.test(df_test_aa$Value_2~df_test_aa$group)
df_test_aa_value_2$p.value
[1] 0.05854026
每当群组超过2时,我会测试所有可能的排列:
前0对1:
df %>% filter(test %in% 'BB' & group %in% c(0,1)) -> df_test_bb_01
df_test_bb_01_value_1 <- oneway.test(df_test_bb_01$Value_1~df_test_bb_01$group)
df_test_bb_01_value_1$p.value
[1] 0.3585415
df_test_bb_01_value_2 <- oneway.test(df_test_bb_01$Value_2~df_test_bb_01$group)
df_test_bb_01_value_2$p.value
[1] 0.4848446
然后0 vs 2:
df %>%
filter(test %in% 'BB' & group %in% c(0,2)) -> df_test_bb_02
df_test_bb_02_value_1 <- oneway.test(df_test_bb_02$Value_1~df_test_bb_02$group)
df_test_bb_02_value_1$p.value
[1] 0.3246012
df_test_bb_02_value_2 <- oneway.test(df_test_bb_02$Value_2~df_test_bb_02$group)
df_test_bb_02_value_2$p.value
[1] 0.4142838
然后1 vs 2:
df %>%
filter(test %in% 'BB' & group %in% c(1,2)) -> df_test_bb_12
df_test_bb_12_value_1 <- oneway.test(df_test_bb_12$Value_1~df_test_bb_12$group)
df_test_bb_12_value_1$p.value
[1] 0.08105404
df_test_bb_12_value_2 <- oneway.test(df_test_bb_12$Value_2~df_test_bb_12$group)
df_test_bb_12_value_2$p.value
[1] 0.6245713
因此,我希望获得一个看起来像这样的df:
test value p_value_2sided hypothesis
AA Value_1 0.201 0,1
AA Value_2 0.059 0,1
BB Value_1 0.359 0,1
BB Value_1 0.325 0,2
BB Value_1 0.081 1,2
BB Value_2 0.485 0,1
BB Value_2 0.414 0,2
BB Value_2 0.625 1,2
感谢您的提示!
答案 0 :(得分:2)
df <- structure(list(test = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L), .Label = c("AA", "BB"), class = "factor"), group = c(0L,
0L, 1L, 1L, 0L, 0L, 1L, 1L, 2L, 2L), Value_1 = c(15.1, 12.4,
9.6, 10.2, 12.11, 14, 11, 12.3, 14.2, 15), Value_2 = c(11.2,
8.6, 22.5, 22, 11, 1.2, 13.2, 9, 12, 13)), .Names = c("test",
"group", "Value_1", "Value_2"), class = "data.frame", row.names = c(NA, -10L))
library(tidyverse)
# reshape dataset
df2 = df %>% gather(value, v, -test, -group)
# function to compute p value
# vectorized version
f = function(t,val,x1,x2) {
(df2 %>%
filter(test == t & value == val & group %in% c(x1,x2)) %>%
oneway.test(v~group, data = .))$p.value }
f = Vectorize(f)
df2 %>%
distinct(test, group, value) %>% # get unique combinations
group_by(test, value) %>% # for each test and value
nest() %>% # nest rest of columns
mutate(d = map(data, ~data.frame(t(combn(.$group, 2)))),
hypothesis = map(d, ~paste0(.$X1, ",", .$X2))) %>% # get pairs/combinations of values
unnest(d, hypothesis) %>% # unnest data
mutate(pval = f(test, value, X1, X2)) # apply vectorised function to get p value
# # A tibble: 8 x 6
# test value hypothesis X1 X2 pval
# <fctr> <chr> <chr> <int> <int> <dbl>
# 1 AA Value_1 0,1 0 1 0.201
# 2 BB Value_1 0,1 0 1 0.359
# 3 BB Value_1 0,2 0 2 0.325
# 4 BB Value_1 1,2 1 2 0.0811
# 5 AA Value_2 0,1 0 1 0.0585
# 6 BB Value_2 0,1 0 1 0.485
# 7 BB Value_2 0,2 0 2 0.414
# 8 BB Value_2 1,2 1 2 0.625
如果您确实不需要,可以删除X1
和X2
。
但是,通过这种方式,您(也)将它们作为单独的数字变量,以防您在分析的后期阶段在另一个过程中使用它们(例如,对特定组进行过滤)。
答案 1 :(得分:1)
嗯,这不是很好但是......
RECORD_AUDIO