循环TukeyHSD R后打印重要值

时间:2018-08-07 20:39:49

标签: r

我是R且正在编码的新手,所以请多多包涵。我的数据如下:

    Names   Prof_A   Prof_B...............Prof_Z  cond
    Aaliya  2.1      3.1  ................ 2.3     A
    Adam    1.87     2.3  .................2.2     A
    .
    .
    Brett   1.69     2.6...................1.78    B         

等(大约1700次观察)

我试图通过每个数字列进行方差分析,然后进行Tukey的测试。我只想打印对比度的重要值。

    library(car)
    require(graphics)
    options(max.print = 99999)

    ANV <- rep(NA,ncol(total))
    sink("Anova-Tukey-sig.doc")

    for (i in 2:(ncol(total)-1)) {
        column <- names(total[i])
        ANV <- summary(aov(total[,i]~cond,data=total))
        posthocresult <- TukeyHSD(aov(total[,i]~cond,data=total))
        print(column)
        print(ANV)
        print(posthocresult[posthocresult$cond[,4]<=.05])
    }
    sink()

但是,代码给了我一个奇怪的输出,就像我给出的一些摘要):

    [1] "Prof_A"
          Df Sum Sq   Mean Sq F value  Pr(>F)   
    cond          25 0.0228 0.0009111   1.864 0.00597 **
    Residuals   1690 0.8262 0.0004889                   
    ---
    Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
    named list()
    [1] "Prof_B"
          Df Sum Sq   Mean Sq F value   Pr(>F)    
    cond          25 0.0468 0.0018719   2.889 2.54e-06 ***
    Residuals   1690 1.0949 0.0006479                     
    ---
    Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
    $<NA>
    NULL

    $<NA>
    NULL

    $<NA>
    NULL

跳过一些输出

    [1] "Prof_R"
          Df Sum Sq   Mean Sq F value  Pr(>F)    
    cond          25 0.0284 0.0011345   3.404 3.1e-08 ***
    Residuals   1690 0.5633 0.0003333                    
    ---
    Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
    $cond
            diff           lwr           upr        p adj
    B-A  9.802223e-03  1.237889e-03  0.0183665561 6.894820e-03
    C-A  6.243324e-03 -1.302350e-03  0.0137889986 2.940579e-01
    D-A  1.054579e-02  1.713554e-03  0.0193780265 3.204823e-03
    E-A  4.295824e-03 -3.942431e-03  0.0125340784 9.752564e-01
    F-A  4.607934e-03 -8.302024e-03  0.0175178915 9.999225e-01

等有人可以帮忙吗?

1 个答案:

答案 0 :(得分:0)

首先,我认为我们不需要整个数据集都具有可重现的示例,而应该有一个子集。

total <- read.csv("https://www.dropbox.com/s/trb7d7twm703di8/total.csv?dl=1")[-1]

set.seed(1)
total.sub <- total[sample(1:400, 30), c(1:4, 26)]

total.sub[1] <- droplevels(total.sub[1])
total.sub[5] <- droplevels(total.sub[5])

total.sub[2:4] <- sapply(total.sub[2:4], function(x) round(x*1000))
rownames(total.sub) <- NULL

已经创建了一个子集,您现在可以使用dput()创建一个具体的表示形式,并提供给其他人使用

dput(total.sub)

structure(list(V1 = structure(c(8L, 13L, 15L, 25L, 5L, 23L, 26L, 17L, 16L, 2L, 6L,
4L, 18L, 30L, 21L, 14L, 19L, 27L, 12L, 20L, 24L, 28L, 29L, 3L, 7L, 11L, 1L, 10L,
22L, 9L), .Label = c("Abigail", "Adriel", "Alden", "Ali", "Aliya", "Aliyah",
"Amari", "Amia", "Andrea", "Annabelle", "Annalise", "Anne", "Ansley", "Aubree",
"Beckham", "Blaise", "Bradley", "Brayden", "Brett", "Bruce", "Bruno", "Camron",
"Cecilia", "Cedric", "Chad", "Charlotte", "Christian", "Clay", "Clementine",
"Coleman"), class = "factor"), Profession_A = c(24, 30, -13, 33, 17, 38, 33, 31,
31, 50, 39, 11, 20, 27, 24, 54, 53, 9, 42, 56, 39, 9, 7, 54, 0, 46, 22, 16, 1, 45),
Profession_B = c(51, 76, 3, 35, 38, 40, 14, 42, 79, 76, 19, 16, 62, 37, 25, 71, 62,
-21, 36, 50, 53, 2, 37, 84, 25, 77, 45, 45, 42, 36), Profession_C = c(29, 26, -8,
29, 16, 47, 21, 34, 42, 56, 21, 4, 32, 31, 24, 55, 37, -3, 36, 43, 49, 14, 19, 52,
4, 51, 26, 28, 33, 45), cond = structure(c(1L, 1L, 2L, 3L, 1L, 3L, 3L, 2L, 2L, 1L,
1L, 1L, 2L, 3L, 2L, 1L, 2L, 3L, 1L, 2L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 3L, 1L),
.Label = c("A", "B", "C"), class = "factor")), row.names = c(NA, -30L), class =
"data.frame") -> total

那么即使有一天该保管箱链接应该消失,任何人都可以跟进。

anv.l <- list()
hsd.l <- list()
for (i in 2:(ncol(total)-1)) {
    column <- names(total[i])

    ANV <- aov(total[, i] ~ cond, data=total)
    anv.l[[i - 1]] <- ANV
    names(anv.l)[i - 1] <- column

    posthocresult <- TukeyHSD(ANV)
    hsd.l[[i - 1]] <- posthocresult
    names(hsd.l)[i - 1] <- column
}

coeff <- sapply(anv.l, coef)
summ <- sapply(anv.l, summary)

p.val <- sapply(hsd.l, function(x) round(x[[1]][,4], 3))