我有一个data.frame,我想使用tables::tabular()
来设置乳胶中的漂亮打印。它有两个组中的5个重复项目(normal
和compress
),其中我想要将三个项目分组,然后将其余组分组。
test_table <- structure(list(id = structure(c(2L, 3L, 5L, 1L, 4L, 2L, 3L, 5L,
1L, 4L), .Label = c("GO:0005525", "GO:0005634", "GO:0008270",
"GO:0019001", "GO:0046914"), class = "factor"), description = c("nucleus",
"zinc ion binding", "transition metal ion binding", "GTP binding",
"guanyl nucleotide binding", "nucleus", "zinc ion binding", "transition metal ion binding",
"GTP binding", "guanyl nucleotide binding"), IPR.group = c("H",
"W", "W", "AE", "AE", "H", "W", "W", "AE", "AE"), consistent = c(TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE), p = c(4.92245771293119e-05,
1.08157386873641e-21, 2.06049782601929e-14, 0.999999999562468,
0.999999999985399, 1, 1, 0.999999999999996, 6.51428091733489e-09,
2.3200965815753e-10), padjust = c(0.0166308749872604, 8.52640733187206e-19,
1.2182693396339e-11, 1, 1, 1, 1, 1, 9.06251433499824e-07, 3.91930601101827e-08
), metal = c("zn", "zn", "zn", "mg", "mg", "ca", "ca", "ca",
"ca", "ca"), perc = c(0.841726618705036, 0.831807780320366, 0.519281914893617,
0.875598086124402, 0.876651982378855, 0, 0, 0, 0, 0), sig = c("TRUE",
"TRUE", "TRUE", "FALSE", "FALSE", "FALSE", "FALSE", "FALSE",
"TRUE", "TRUE"), which = structure(c(2L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 1L), .Label = c("compress", "normal"), class = "factor")), .Names = c("id",
"description", "IPR.group", "consistent", "p", "padjust", "metal",
"perc", "sig", "which"), row.names = c(NA, -10L), class = "data.frame")
test_table
id description IPR.group consistent p padjust metal perc sig which
1 GO:0005634 nucleus H TRUE 4.922458e-05 1.663087e-02 zn 0.8417266 TRUE normal
2 GO:0008270 zinc ion binding W TRUE 1.081574e-21 8.526407e-19 zn 0.8318078 TRUE normal
3 GO:0046914 transition metal ion binding W TRUE 2.060498e-14 1.218269e-11 zn 0.5192819 TRUE normal
4 GO:0005525 GTP binding AE TRUE 1.000000e+00 1.000000e+00 mg 0.8755981 FALSE normal
5 GO:0019001 guanyl nucleotide binding AE TRUE 1.000000e+00 1.000000e+00 mg 0.8766520 FALSE normal
6 GO:0005634 nucleus H TRUE 1.000000e+00 1.000000e+00 ca 0.0000000 FALSE compress
7 GO:0008270 zinc ion binding W TRUE 1.000000e+00 1.000000e+00 ca 0.0000000 FALSE compress
8 GO:0046914 transition metal ion binding W TRUE 1.000000e+00 1.000000e+00 ca 0.0000000 FALSE compress
9 GO:0005525 GTP binding AE TRUE 6.514281e-09 9.062514e-07 ca 0.0000000 TRUE compress
10 GO:0019001 guanyl nucleotide binding AE TRUE 2.320097e-10 3.919306e-08 ca 0.0000000 TRUE compress
所以,如果我这样做,我可以开始接近:
library(tables)
tabular(id ~ which*(p + padjust + metal + perc + sig)*Heading()*identity, data = test_table)
which
compress normal
id p padjust metal perc sig p padjust metal perc sig
GO:0005525 6.514e-09 9.063e-07 ca 0 TRUE 1.000e+00 1.000e+00 mg 0.8756 FALSE
GO:0005634 1.000e+00 1.000e+00 ca 0 FALSE 4.922e-05 1.663e-02 zn 0.8417 TRUE
GO:0008270 1.000e+00 1.000e+00 ca 0 FALSE 1.082e-21 8.526e-19 zn 0.8318 TRUE
GO:0019001 2.320e-10 3.919e-08 ca 0 TRUE 1.000e+00 1.000e+00 mg 0.8767 FALSE
GO:0046914 1.000e+00 1.000e+00 ca 0 FALSE 2.060e-14 1.218e-11 zn 0.5193 TRUE
但是,只要我尝试将description
列添加到我认为的任何位置
它应该是,我开始出错:
tabular((id + description) ~ which*(p + padjust + metal + perc + sig)*Heading()*identity, data = test_table)
# Error in term2table(rows[[i]], cols[[j]], data, n) : Duplicate values: description and p
tabular((id + IPR.group) ~ which*(p + padjust + metal + perc + sig)*Heading()*identity, data = test_table)
# Error in term2table(rows[[i]], cols[[j]], data, n) : Duplicate values: IPR.group and p
即使把description
放在独立的一边,也会让角色变成一个非常有趣的东西:
tabular(id ~ description + which*(p + padjust + metal + perc + sig)*Heading()*identity, data = test_table)
which
compress normal
id description p padjust metal perc sig p padjust metal perc sig
GO:0005525 2 6.514e-09 9.063e-07 ca 0 TRUE 1.000e+00 1.000e+00 mg 0.8756 FALSE
GO:0005634 2 1.000e+00 1.000e+00 ca 0 FALSE 4.922e-05 1.663e-02 zn 0.8417 TRUE
GO:0008270 2 1.000e+00 1.000e+00 ca 0 FALSE 1.082e-21 8.526e-19 zn 0.8318 TRUE
GO:0019001 2 2.320e-10 3.919e-08 ca 0 TRUE 1.000e+00 1.000e+00 mg 0.8767 FALSE
GO:0046914 2 1.000e+00 1.000e+00 ca 0 FALSE 2.060e-14 1.218e-11 zn 0.5193 TRUE
如果我创建一个新的列,它可以捏造它 我想要显示的列,但是我必须写一些东西才能使它们看起来一致:
test_table$ID <- paste0(test_table$id, " ", test_table$description, " ", test_table$IPR.group)
test_table$ID <- factor(test_table$ID)
tabular(ID ~ which*(p + padjust + metal + perc + sig)*Heading()*identity, data = test_table)
which
compress normal
ID p padjust metal perc sig p padjust metal perc sig
GO:0005525 GTP binding AE 6.514e-09 9.063e-07 ca 0 TRUE 1.000e+00 1.000e+00 mg 0.8756 FALSE
GO:0005634 nucleus H 1.000e+00 1.000e+00 ca 0 FALSE 4.922e-05 1.663e-02 zn 0.8417 TRUE
GO:0008270 zinc ion binding W 1.000e+00 1.000e+00 ca 0 FALSE 1.082e-21 8.526e-19 zn 0.8318 TRUE
GO:0019001 guanyl nucleotide binding AE 2.320e-10 3.919e-08 ca 0 TRUE 1.000e+00 1.000e+00 mg 0.8767 FALSE
GO:0046914 transition metal ion binding W 1.000e+00 1.000e+00 ca 0 FALSE 2.060e-14 1.218e-11 zn 0.5193 TRUE
我认为我应该能够使用上面的其他解决方案中的一个,但不是那么多。任何帮助,将不胜感激。此外,任何解决方案还应删除表格标题中which
和compress
上方显示的normal
。
答案 0 :(得分:1)
这似乎很接近,至少:
> tabular(id ~ Heading()*which*(description + p + padjust + metal + perc + sig)*Heading()*identity, data = test_table)
compress
id description p padjust metal perc sig
GO:0005525 GTP binding 6.514e-09 9.063e-07 ca 0 TRUE
GO:0005634 nucleus 1.000e+00 1.000e+00 ca 0 FALSE
GO:0008270 zinc ion binding 1.000e+00 1.000e+00 ca 0 FALSE
GO:0019001 guanyl nucleotide binding 2.320e-10 3.919e-08 ca 0 TRUE
GO:0046914 transition metal ion binding 1.000e+00 1.000e+00 ca 0 FALSE
normal
description p padjust metal perc sig
GTP binding 1.000e+00 1.000e+00 mg 0.8756 FALSE
nucleus 4.922e-05 1.663e-02 zn 0.8417 TRUE
zinc ion binding 1.082e-21 8.526e-19 zn 0.8318 TRUE
guanyl nucleotide binding 1.000e+00 1.000e+00 mg 0.8767 FALSE
transition metal ion binding 2.060e-14 1.218e-11 zn 0.5193 TRUE
...但您可能对每个description
组中which
列的重复感到满意。可能有一种方法可以通过将description
术语拉到parens之外来解决这个问题,但看起来这需要一些其他神奇的咒语,因为天真的变化会抱怨重复值与{{1似乎。
编辑:如此贴近神奇的咒语......
p
这看起来不对(也许?)。问题似乎是tabular(id ~ (description*Heading()*min)+Heading()*which*(p + padjust + metal + perc + sig)*Heading()*identity, data = test_table)
确实要将摘要功能应用于tabular
。在这种情况下,我认为description
可能比unique()
更好地选择“虚拟”汇总函数,并且似乎给出了相同的结果。
编辑:最新改进......
min()