在R中的组之间进行检验

时间:2018-07-26 09:37:48

标签: r dplyr tidyr

此问题与此问题相似。 Error in match.arg(p.adjust.method) : 'arg' must be NULL or a character vector 其中,wilcox.test是针对每个ID计算的,其中group作为分组变量。 这个解决方案对我很有帮助

mydat %>% 
  group_by(id) %>%
  do({
    with(., pairwise.wilcox.test(var, group, exact =F)) %>% broom::tidy()
  }) %>% 
  mutate(group1 = as.numeric(as.character(group1)), 
         group2 = as.numeric(as.character(group2))) %>%
  complete(group1 = mydat$group) %>%
  left_join(mydat %>% group_by(id,group) %>% summarise_all(c("mean", "sd", "median")), 
            by=c('id', 'group1'='group'))

除了cor.test(Spearman),如何做同样的事情? 我不需要summarise_all(c(“ mean”,“ sd”,“ median”))), 作为输出,我需要组之间的cor coef。 。 组1的var与组2的var相关 组1的var与组3的var相关,依此类推。

输出

id  group1  group2  cor coef
<int>   <dbl>   <dbl>   
1   1.00    NA      NA
1   2.00    1.00    0,1
1   3.00    1.00    0,1
1   3.00    2.00    0,1
2   1.00    NA      NA
2   2.00    1.00    0,1
2   3.00    1.00    0,1
2   3.00    2.00    0,1

这里的数据

mydat=structure(list(id = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), group = c(1L, 
1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 2L, 
2L, 2L, 2L, 3L, 3L, 3L, 3L), var = c(23L, 24L, 24L, 23L, 23L, 
24L, 24L, 23L, 23L, 24L, 24L, 23L, 23L, 24L, 24L, 23L, 23L, 24L, 
24L, 23L, 23L, 24L, 24L, 23L)), .Names = c("id", "group", "var"
), class = "data.frame", row.names = c(NA, -24L))

1 个答案:

答案 0 :(得分:1)

这里是创建pairwise.cor.test的功能。您可以在代码中替换pairwise.wilcox.test,这将为您提供所需的输出。我没有检查所有可能出错的东西,以便在投入生产之前进行测试:

pairwise.cor.test <- function (x, g, p.adjust.method = p.adjust.methods, method = c("pearson", "kendall", "spearman"), ...)
{
  method <- match.arg(method)
  p.adjust.method <- match.arg(p.adjust.method)
  DNAME <- paste(deparse(substitute(x)), "and", deparse(substitute(g)))
  g <- factor(g)
  compare.levels <- function(i, j) {
    xi <- x[as.integer(g) == i]
    xj <- x[as.integer(g) == j]
    cor.test(xi, xj, method=method, ...)$p.value
  }
  PVAL <- pairwise.table(compare.levels, levels(g), p.adjust.method)
  if (method=="pearson")
    METHOD <- "Pearson's product-moment correlation"
  if (method=="kendall")
    METHOD <- "Kendall's rank correlation tau"
  if (method=="spearman")
    METHOD <- "Spearman's rank correlation rho"

  ans <- list(method = METHOD, data.name = DNAME, p.value = PVAL,
              p.adjust.method = p.adjust.method)
  class(ans) <- "pairwise.htest"
  ans
}

我实际上只是改编了pairwise.wilcox.test中的代码。如果对数据运行此功能,则会收到很多警告,因为数据中存在很多联系(并且没有实际变化),因此我在调用中添加了exact=FALSE以防止进行精确计算。

mydat %>% 
    group_by(id) %>%
    do({
        with(., pairwise.cor.test(var, group, method="spearman", exact=FALSE)) %>% broom::tidy()
    }) %>% 
    mutate(group1 = as.numeric(as.character(group1)), 
           group2 = as.numeric(as.character(group2))) %>%
    tidyr::complete(group1 = mydat$group) %>%
    left_join(mydat %>% group_by(id,group) %>% summarise_all(c("mean", "sd", "median")), 
              by=c('id', 'group1'='group'))

这产生

# A tibble: 8 x 10
# Groups:   id [?]
     id group1 group2 p.value var_mean var2_mean var_sd var2_sd var_median var2_median
  <int>  <dbl>  <dbl>   <dbl>    <dbl>     <dbl>  <dbl>   <dbl>      <dbl>       <dbl>
1     1      1     NA      NA     23.5    -0.990  0.577   0.937       23.5     -0.824 
2     1      2      1       0     23.5     0.551  0.577   0.799       23.5      0.523 
3     1      3      1       0     23.5    -0.548  0.577   0.693       23.5     -0.243 
4     1      3      2       0     23.5    -0.548  0.577   0.693       23.5     -0.243 
5     2      1     NA      NA     23.5    -0.532  0.577   1.83        23.5     -1.26  
6     2      2      1       0     23.5    -0.475  0.577   1.15        23.5     -0.367 
7     2      3      1       0     23.5     0.161  0.577   1.28        23.5      0.0778
8     2      3      2       0     23.5     0.161  0.577   1.28        23.5      0.0778

更新: 要仅获得带有p值的输出,可以减少数据争执:

mydat %>% 
    group_by(id) %>%
    do({
        with(., pairwise.cor.test(var, group, method="spearman", exact=FALSE)) %>% broom::tidy()
    }) 

给出

# A tibble: 6 x 4
# Groups:   id [2]
     id group1 group2 p.value
  <int> <fct>  <chr>    <dbl>
1     1 2      1            0
2     1 3      1            0
3     1 3      2            0
4     2 2      1            0
5     2 3      1            0
6     2 3      2            0

我今晚晚些时候将函数添加到MESS包中,以供以后保存。