R表格()输出中的多个非分组项目

时间:2016-11-03 16:54:31

标签: r

我有一个data.frame,我想使用tables::tabular()来设置乳胶中的漂亮打印。它有两个组中的5个重复项目(normalcompress),其中我想要将三个项目分组,然后将其余组分组。

test_table <- structure(list(id = structure(c(2L, 3L, 5L, 1L, 4L, 2L, 3L, 5L, 
1L, 4L), .Label = c("GO:0005525", "GO:0005634", "GO:0008270", 
"GO:0019001", "GO:0046914"), class = "factor"), description = c("nucleus", 
"zinc ion binding", "transition metal ion binding", "GTP binding", 
"guanyl nucleotide binding", "nucleus", "zinc ion binding", "transition metal ion binding", 
"GTP binding", "guanyl nucleotide binding"), IPR.group = c("H", 
"W", "W", "AE", "AE", "H", "W", "W", "AE", "AE"), consistent = c(TRUE, 
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE), p = c(4.92245771293119e-05, 
1.08157386873641e-21, 2.06049782601929e-14, 0.999999999562468, 
0.999999999985399, 1, 1, 0.999999999999996, 6.51428091733489e-09, 
2.3200965815753e-10), padjust = c(0.0166308749872604, 8.52640733187206e-19, 
1.2182693396339e-11, 1, 1, 1, 1, 1, 9.06251433499824e-07, 3.91930601101827e-08
), metal = c("zn", "zn", "zn", "mg", "mg", "ca", "ca", "ca", 
"ca", "ca"), perc = c(0.841726618705036, 0.831807780320366, 0.519281914893617, 
0.875598086124402, 0.876651982378855, 0, 0, 0, 0, 0), sig = c("TRUE", 
"TRUE", "TRUE", "FALSE", "FALSE", "FALSE", "FALSE", "FALSE", 
"TRUE", "TRUE"), which = structure(c(2L, 2L, 2L, 2L, 2L, 1L, 
1L, 1L, 1L, 1L), .Label = c("compress", "normal"), class = "factor")), .Names = c("id", 
"description", "IPR.group", "consistent", "p", "padjust", "metal", 
"perc", "sig", "which"), row.names = c(NA, -10L), class = "data.frame")

test_table
           id                  description IPR.group consistent            p      padjust metal      perc   sig    which
1  GO:0005634                      nucleus         H       TRUE 4.922458e-05 1.663087e-02    zn 0.8417266  TRUE   normal
2  GO:0008270             zinc ion binding         W       TRUE 1.081574e-21 8.526407e-19    zn 0.8318078  TRUE   normal
3  GO:0046914 transition metal ion binding         W       TRUE 2.060498e-14 1.218269e-11    zn 0.5192819  TRUE   normal
4  GO:0005525                  GTP binding        AE       TRUE 1.000000e+00 1.000000e+00    mg 0.8755981 FALSE   normal
5  GO:0019001    guanyl nucleotide binding        AE       TRUE 1.000000e+00 1.000000e+00    mg 0.8766520 FALSE   normal
6  GO:0005634                      nucleus         H       TRUE 1.000000e+00 1.000000e+00    ca 0.0000000 FALSE compress
7  GO:0008270             zinc ion binding         W       TRUE 1.000000e+00 1.000000e+00    ca 0.0000000 FALSE compress
8  GO:0046914 transition metal ion binding         W       TRUE 1.000000e+00 1.000000e+00    ca 0.0000000 FALSE compress
9  GO:0005525                  GTP binding        AE       TRUE 6.514281e-09 9.062514e-07    ca 0.0000000  TRUE compress
10 GO:0019001    guanyl nucleotide binding        AE       TRUE 2.320097e-10 3.919306e-08    ca 0.0000000  TRUE compress

所以,如果我这样做,我可以开始接近:

library(tables)
tabular(id ~ which*(p + padjust + metal + perc + sig)*Heading()*identity, data = test_table)




    which                                                                      
            compress                             normal                                
 id         p         padjust   metal perc sig   p         padjust   metal perc   sig  
 GO:0005525 6.514e-09 9.063e-07 ca    0    TRUE  1.000e+00 1.000e+00 mg    0.8756 FALSE
 GO:0005634 1.000e+00 1.000e+00 ca    0    FALSE 4.922e-05 1.663e-02 zn    0.8417 TRUE 
 GO:0008270 1.000e+00 1.000e+00 ca    0    FALSE 1.082e-21 8.526e-19 zn    0.8318 TRUE 
 GO:0019001 2.320e-10 3.919e-08 ca    0    TRUE  1.000e+00 1.000e+00 mg    0.8767 FALSE
 GO:0046914 1.000e+00 1.000e+00 ca    0    FALSE 2.060e-14 1.218e-11 zn    0.5193 TRUE 

但是,只要我尝试将description列添加到我认为的任何位置 它应该是,我开始出错:

tabular((id + description) ~ which*(p + padjust + metal + perc + sig)*Heading()*identity, data = test_table)
# Error in term2table(rows[[i]], cols[[j]], data, n) : Duplicate values: description and p

tabular((id + IPR.group) ~ which*(p + padjust + metal + perc + sig)*Heading()*identity, data = test_table)
# Error in term2table(rows[[i]], cols[[j]], data, n) : Duplicate values: IPR.group and p

即使把description放在独立的一边,也会让角色变成一个非常有趣的东西:

tabular(id ~ description + which*(p + padjust + metal + perc + sig)*Heading()*identity, data = test_table)
                        which                                                                      
                        compress                             normal                                
 id         description p         padjust   metal perc sig   p         padjust   metal perc   sig  
 GO:0005525 2           6.514e-09 9.063e-07 ca    0    TRUE  1.000e+00 1.000e+00 mg    0.8756 FALSE
 GO:0005634 2           1.000e+00 1.000e+00 ca    0    FALSE 4.922e-05 1.663e-02 zn    0.8417 TRUE 
 GO:0008270 2           1.000e+00 1.000e+00 ca    0    FALSE 1.082e-21 8.526e-19 zn    0.8318 TRUE 
 GO:0019001 2           2.320e-10 3.919e-08 ca    0    TRUE  1.000e+00 1.000e+00 mg    0.8767 FALSE
 GO:0046914 2           1.000e+00 1.000e+00 ca    0    FALSE 2.060e-14 1.218e-11 zn    0.5193 TRUE 

如果我创建一个新的列,它可以捏造它 我想要显示的列,但是我必须写一些东西才能使它们看起来一致:

test_table$ID <- paste0(test_table$id, " ", test_table$description, " ", test_table$IPR.group)
test_table$ID <- factor(test_table$ID)
tabular(ID ~ which*(p + padjust + metal + perc + sig)*Heading()*identity, data = test_table)


                                           which                                                                      
                                           compress                             normal                                
 ID                                        p         padjust   metal perc sig   p         padjust   metal perc   sig  
 GO:0005525 GTP binding AE                 6.514e-09 9.063e-07 ca    0    TRUE  1.000e+00 1.000e+00 mg    0.8756 FALSE
 GO:0005634 nucleus H                      1.000e+00 1.000e+00 ca    0    FALSE 4.922e-05 1.663e-02 zn    0.8417 TRUE 
 GO:0008270 zinc ion binding W             1.000e+00 1.000e+00 ca    0    FALSE 1.082e-21 8.526e-19 zn    0.8318 TRUE 
 GO:0019001 guanyl nucleotide binding AE   2.320e-10 3.919e-08 ca    0    TRUE  1.000e+00 1.000e+00 mg    0.8767 FALSE
 GO:0046914 transition metal ion binding W 1.000e+00 1.000e+00 ca    0    FALSE 2.060e-14 1.218e-11 zn    0.5193 TRUE 

我认为我应该能够使用上面的其他解决方案中的一个,但不是那么多。任何帮助,将不胜感激。此外,任何解决方案还应删除表格标题中whichcompress上方显示的normal

1 个答案:

答案 0 :(得分:1)

这似乎很接近,至少:

> tabular(id ~ Heading()*which*(description + p + padjust + metal + perc + sig)*Heading()*identity, data = test_table)

            compress                                                         
 id         description                  p         padjust   metal perc sig  
 GO:0005525 GTP binding                  6.514e-09 9.063e-07 ca    0    TRUE 
 GO:0005634 nucleus                      1.000e+00 1.000e+00 ca    0    FALSE
 GO:0008270 zinc ion binding             1.000e+00 1.000e+00 ca    0    FALSE
 GO:0019001 guanyl nucleotide binding    2.320e-10 3.919e-08 ca    0    TRUE 
 GO:0046914 transition metal ion binding 1.000e+00 1.000e+00 ca    0    FALSE

 normal                                                             
 description                  p         padjust   metal perc   sig  
 GTP binding                  1.000e+00 1.000e+00 mg    0.8756 FALSE
 nucleus                      4.922e-05 1.663e-02 zn    0.8417 TRUE 
 zinc ion binding             1.082e-21 8.526e-19 zn    0.8318 TRUE 
 guanyl nucleotide binding    1.000e+00 1.000e+00 mg    0.8767 FALSE
 transition metal ion binding 2.060e-14 1.218e-11 zn    0.5193 TRUE 

...但您可能对每个description组中which列的重复感到满意。可能有一种方法可以通过将description术语拉到parens之外来解决这个问题,但看起来这需要一些其他神奇的咒语,因为天真的变化会抱怨重复值与{{1似乎。

编辑:如此贴近神奇的咒语......

p

这看起来不对(也许?)。问题似乎是tabular(id ~ (description*Heading()*min)+Heading()*which*(p + padjust + metal + perc + sig)*Heading()*identity, data = test_table) 确实要将摘要功能应用于tabular。在这种情况下,我认为description可能比unique()更好地选择“虚拟”汇总函数,并且似乎给出了相同的结果。

编辑:最新改进......

min()