按功能汇总信息

时间:2019-04-18 20:56:33

标签: r statistics

我如何从by函数内的pairwise.t.test中总结所有pvalue? 因此,我想从pairwise.t.test中将HW1,HW2,HW3和PG的p.value提取到data.frame中(请参见下面的输出示例)。 实际上,我拥有的比HW1,HW2,HW3和PG还要多。

dflong <- structure(list(moda = structure(c(4L, 4L, 4L, 4L, 4L, 1L, 1L, 
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 
4L, 4L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 
3L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 
3L, 3L, 3L, 3L, 3L), .Label = c("HW1", "HW2", "HW3", "PG"), class = "factor"), 
    replicates = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 
    12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 1L, 2L, 3L, 
    4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 
    17L, 18L, 19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 
    11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L), time = c("t0", 
    "t0", "t0", "t0", "t0", "t0", "t0", "t0", "t0", "t0", "t0", 
    "t0", "t0", "t0", "t0", "t0", "t0", "t0", "t0", "t0", "t14", 
    "t14", "t14", "t14", "t14", "t14", "t14", "t14", "t14", "t14", 
    "t14", "t14", "t14", "t14", "t14", "t14", "t14", "t14", "t14", 
    "t14", "t29", "t29", "t29", "t29", "t29", "t29", "t29", "t29", 
    "t29", "t29", "t29", "t29", "t29", "t29", "t29", "t29", "t29", 
    "t29", "t29", "t29"), unified = c(1096.6, 1304, 1205.2, 1278.9, 
    1221.3, 1090.7, 1022.7, 1071.3, 808.8, 1093.9, 1710.5, 1541.3, 
    1352.3, 1550.8, 1482.8, 1733.1, 1652.2, 1736.2, 1730.2, 1554, 
    263.7, 283, 302.3, 305.3, 288, 314.4, 369.3, 408.5, 408.5, 
    376.9, 295.9, 299.7, 304, 306.9, 309.3, 300.7, 300.6, 298.3, 
    302, 306, 6.68, 6.58, 6.63, 6.44, 6.55, 5.53, 4.56, 3.58, 
    3.84, 4.67, 6.57, 6.62, 6.62, 6.6, 6.62, 6.69, 6.86, 6.99, 
    6.95, 6.81)), row.names = c(NA, -60L), class = "data.frame")

yy <-dflong$unified
xx <-dflong$time
pair <- function(x) {hhg <- pairwise.t.test(yy,xx,data=x, p.adj = "bonf") }
o <-by(dflong$unified, dflong$moda, FUN=pair )

这是我期望的输出结果,但是如果难以编写脚本,则可以是其他内容。

           HW1      HW2       HW3      PG
t0-t0      1        1        1          1
t0-t14     0.156    0.145    0.487      0.156
t0-t29     0.487    0.789    0.487      0.687
t14-t14    1        1        1          1
t14-t29    0.147    0.148    0.125      0.156
t29-t29    1        1        1          1

1 个答案:

答案 0 :(得分:2)

使用tidyverse包有(很长)可能性:

library(tidyverse)
dflong %>% 
  as_tibble %>% 
  select(-replicates) %>% 
  group_by(moda, time) %>% 
  nest(.key = data) %>% 
  {left_join(., ., by = "moda", suffix = c("_1", "_2"))} %>%
  filter(as.numeric(gsub("t", "", time_1)) <= as.numeric(gsub("t", "", time_2))) %>% 
  mutate(time = paste(time_1, time_2, sep = "-")) %>% 
  mutate(pval = map2_dbl(data_1, data_2, ~t.test(.x[[1]], .y[[1]])$p.value)) %>% 
  select(moda, time, pval) %>% 
  spread(moda, pval)

 A tibble: 6 x 5
     time          HW1          HW2          HW3           PG
*   <chr>        <dbl>        <dbl>        <dbl>        <dbl>
1   t0-t0 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
2  t0-t14 1.154305e-04 2.874392e-05 2.544053e-06 6.916064e-06
3  t0-t29 4.647212e-05 1.243158e-05 1.202839e-06 4.619454e-06
4 t14-t14 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
5 t14-t29 2.746895e-05 2.655776e-08 1.945818e-09 2.955209e-06
6 t29-t29 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00