可以将gg_tree中的geom_text编码为不与节点错开距离吗?

时间:2018-01-10 20:21:00

标签: r ggplot2 ggtree

我已经对R中的一些氨基酸序列进行了比对,并导入了距离矩阵(dist_mat),以便使用tree <- ape::nj(dist_mat)在ggtree中使用。它看起来像这样:

    node parent branch.length          x         y     label isTip      branch    angle
1     1     14   0.000000000 0.00000000  3.000000  GAS05134  TRUE 0.000000000  90.0000
2     2     13   0.000000000 0.00000000  2.000000  GAS12252  TRUE 0.000000000  60.0000
3     3     13   0.000000000 0.00000000  1.000000  GAS12271  TRUE 0.000000000  30.0000
4     4     15   0.004565217 0.02000000  4.000000  GAS06216  TRUE 0.017717391 120.0000
5     5     18   0.060110914 0.85012362  7.000000 GAS131472  TRUE 0.820068164 210.0000
6     6     19   0.000000000 0.84990179  8.000000  GAS13399  TRUE 0.849901793 240.0000
7     7     19   0.000000000 0.84990179  9.000000  GAS11282  TRUE 0.849901793 270.0000
8     8     21   0.000000000 0.92485325 11.000000  GAS03101  TRUE 0.924853253 330.0000
9     9     21   0.000000000 0.92485325 12.000000   GAS0354  TRUE 0.924853253 360.0000
10   10     20   0.000000000 0.92485325 10.000000  GAS09426  TRUE 0.924853253 300.0000
11   11     22   0.000000000 0.91032609  5.000000  14GA0305  TRUE 0.910326087 150.0000
12   12     22   0.000000000 0.91032609  6.000000  14GA0286  TRUE 0.910326087 180.0000
13   13     13   0.000000000 0.00000000  2.447917      <NA> FALSE 0.000000000  73.4375
14   14     13   0.000000000 0.00000000  4.343750      <NA> FALSE 0.000000000 130.3125
15   15     14   0.015434783 0.01543478  5.687500      <NA> FALSE 0.007717391 170.6250
16   16     15   0.454136361 0.46957114  7.375000      <NA> FALSE 0.242502963 221.2500
17   17     16   0.031992271 0.50156341  9.250000      <NA> FALSE 0.485567279 277.5000
18   18     17   0.288449292 0.79001271  7.750000      <NA> FALSE 0.645788061 232.5000
19   19     18   0.059889086 0.84990179  8.500000      <NA> FALSE 0.819957250 255.0000
20   20     17   0.423289838 0.92485325 10.750000      <NA> FALSE 0.713208334 322.5000
21   21     20   0.000000000 0.92485325 11.500000      <NA> FALSE 0.924853253 345.0000
22   22     16   0.440754944 0.91032609  5.500000      <NA> FALSE 0.689948615 165.0000

gg_tree中的基本表示如下所示:

> gg_tree <- ggtree(size=0.2,tree, layout = "circular", branch.length = "none") + geom_tiplab2(color='blue', size=3) 

enter image description here

然后我附加一些数据来添加原始数据框中的美学:

> gg_tree <- gg_tree %<+% DF
> head(DF, 12)
# A tibble: 12 x 4
   id        emm      tee     `50aa_HVR_peptide`                                
   <chr>     <chr>    <chr>   <chr>                                             
 1 GAS05134  emm1.0   tee1    NGDGNPREVIEDLAANNPAIQNIRLRHENKDLKARLENAMEVAGRDFKRA
 2 GAS12252  emm1.0   tee1    NGDGNPREVIEDLAANNPAIQNIRLRHENKDLKARLENAMEVAGRDFKRA
 3 GAS12271  emm1.0   tee1    NGDGNPREVIEDLAANNPAIQNIRLRHENKDLKARLENAMEVAGRDFKRA
 4 GAS06216  emm1.19  tee1    NGDGNLREVIEDLAANNPAIQNIRLRHENKDLKARLENAMEVAGRDFKRA
 5 GAS131472 emm100.0 tee28.1 RVTTRSQAQDAAGLKEKADKYEVRNHELEHNNEKLKTENSDLKTENSKLT
 6 GAS13399  emm100.5 tee28.1 RVTTRSQAQDAAGLKEKADKYEVRNHELEHNNEKLKTENSKLTSEKEELT
 7 GAS11282  emm100.5 tee28.1 RVTTRSQAQDAAGLKEKADKYEVRNHELEHNNEKLKTENSKLTSEKEELT
 8 GAS03101  emm101.0 tee14.2 ADHPSYTAAKDEVLSKFSVPGHVWAHEREKNDKLSSENEGLKAGLQEKEQ
 9 GAS0354   emm101.0 tee14.2 ADHPSYTAAKDEVLSKFSVPGHVWAHEREKNDKLSSENEGLKAGLQEKEQ
10 GAS09426  emm101.0 tee14.2 ADHPSYTAAKDEVLSKFSVPGHVWAHEREKNDKLSSENEGLKAGLQEKEQ
11 14GA0305  emm103.0 tee8    DSPRDVTSDLTTSMWKKKAEEAEAKASKFEKQLEDYKKAQKDYYEIEEKL
12 14GA0286  emm103.0 tee8    DSPRDVTSDLTTSMWKKKAEEAEAKASKFEKQLEDYKKAQKDYYEIEEKL

然后我想在&#34; tee&#34;中添加信息。作为树外的彩色文本列,我的尝试如下:

> gg_tree + geom_text(size = 3,aes(angle=angle, color=tee, label=tee), hjust=-2)+
  theme(legend.position="right")

enter image description here

正如你所看到的,我试图使用&#34; hjust&#34;得到&#34; geom_text&#34;图层不与&#34; geom_tiplab2&#34;重叠层,但每个&#34; tee&#34;的距离;尖端标签上的文字似乎有很大的不同。

任何人都可以建议如何获得&#34; tee&#34;文本在树的外面顺利运行,然后形成提示标签?注意:这也发生在矩形树上,而不仅仅是圆形树。

> sessionInfo()
R version 3.4.3 (2017-11-30)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS High Sierra 10.13.2

Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib

locale:
[1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8

attached base packages:
[1] stats4    parallel  stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] ggtree_1.10.2       treeio_1.2.1        ggplot2_2.2.1       readxl_1.0.0       
 [5] readr_1.1.1         DECIPHER_2.6.0      RSQLite_2.0         Biostrings_2.46.0  
 [9] XVector_0.18.0      IRanges_2.12.0      S4Vectors_0.16.0    BiocGenerics_0.24.0

loaded via a namespace (and not attached):
 [1] Rcpp_0.12.14     pillar_1.0.1     compiler_3.4.3   cellranger_1.1.0 plyr_1.8.4      
 [6] tools_3.4.3      zlibbioc_1.24.0  digest_0.6.13    bit_1.1-12       jsonlite_1.5    
[11] memoise_1.1.0    tibble_1.4.1     gtable_0.2.0     nlme_3.1-131     lattice_0.20-35 
[16] pkgconfig_2.0.1  rlang_0.1.6      cli_1.0.0        rstudioapi_0.7   DBI_0.7         
[21] rvcheck_0.0.9    hms_0.4.0        bit64_0.9-7      grid_3.4.3       glue_1.2.0      
[26] R6_2.2.2         purrr_0.2.4      tidyr_0.7.2      blob_1.1.0       magrittr_1.5    
[31] scales_0.5.0     assertthat_0.2.0 colorspace_1.3-2 ape_5.0          labeling_0.3    
[36] utf8_1.1.3       lazyeval_0.2.1   munsell_0.4.3    crayon_1.3.4 

1 个答案:

答案 0 :(得分:1)

hjustvjust不能很好地使用coord_polar一个技巧将geom_text标签从中心移开,这也是一个值x坐标:

library(ggtree)
gg_tree + geom_text(size = 3, aes(angle = angle,
                                  color = tee,
                                  label = tee,
                                  x = x + 0.4), hjust = 0)+
  theme(legend.position = "right")

enter image description here

安装gg_tree

source("https://bioconductor.org/biocLite.R")
biocLite("ggtree")

使用过的数据:

> dput(DF)
structure(list(id = structure(c(5L, 9L, 10L, 6L, 11L, 12L, 8L, 
3L, 4L, 7L, 2L, 1L), .Label = c("14GA0286", "14GA0305", "GAS03101", 
"GAS0354", "GAS05134", "GAS06216", "GAS09426", "GAS11282", "GAS12252", 
"GAS12271", "GAS131472", "GAS13399"), class = "factor"), emm = structure(c(1L, 
1L, 1L, 2L, 3L, 4L, 4L, 5L, 5L, 5L, 6L, 6L), .Label = c("emm1.0", 
"emm1.19", "emm100.0", "emm100.5", "emm101.0", "emm103.0"), class = "factor"), 
    tee = structure(c(1L, 1L, 1L, 1L, 3L, 3L, 3L, 2L, 2L, 2L, 
    4L, 4L), .Label = c("tee1", "tee14.2", "tee28.1", "tee8"), class = "factor"), 
    X.50aa_HVR_peptide. = structure(c(4L, 4L, 4L, 3L, 5L, 6L, 
    6L, 1L, 1L, 1L, 2L, 2L), .Label = c("ADHPSYTAAKDEVLSKFSVPGHVWAHEREKNDKLSSENEGLKAGLQEKEQ", 
    "DSPRDVTSDLTTSMWKKKAEEAEAKASKFEKQLEDYKKAQKDYYEIEEKL", "NGDGNLREVIEDLAANNPAIQNIRLRHENKDLKARLENAMEVAGRDFKRA", 
    "NGDGNPREVIEDLAANNPAIQNIRLRHENKDLKARLENAMEVAGRDFKRA", "RVTTRSQAQDAAGLKEKADKYEVRNHELEHNNEKLKTENSDLKTENSKLT", 
    "RVTTRSQAQDAAGLKEKADKYEVRNHELEHNNEKLKTENSKLTSEKEELT"), class = "factor")), .Names = c("id", 
"emm", "tee", "X.50aa_HVR_peptide."), class = "data.frame", row.names = c("1", 
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12"))

> dput(tree)
structure(list(node = 1:22, parent = c(14L, 13L, 13L, 15L, 18L, 
19L, 19L, 21L, 21L, 20L, 22L, 22L, 13L, 13L, 14L, 15L, 16L, 17L, 
18L, 17L, 20L, 16L), branch.length = c(0, 0, 0, 0.004565217, 
0.060110914, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.015434783, 0.454136361, 
0.031992271, 0.288449292, 0.059889086, 0.423289838, 0, 0.440754944
), x = c(0, 0, 0, 0.02, 0.85012362, 0.84990179, 0.84990179, 0.92485325, 
0.92485325, 0.92485325, 0.91032609, 0.91032609, 0, 0, 0.01543478, 
0.46957114, 0.50156341, 0.79001271, 0.84990179, 0.92485325, 0.92485325, 
0.91032609), y = c(3, 2, 1, 4, 7, 8, 9, 11, 12, 10, 5, 6, 2.447917, 
4.34375, 5.6875, 7.375, 9.25, 7.75, 8.5, 10.75, 11.5, 5.5), label = structure(c(6L, 
10L, 11L, 7L, 12L, 13L, 9L, 4L, 5L, 8L, 3L, 2L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L), .Label = c("<NA>", "14GA0286", "14GA0305", 
"GAS03101", "GAS0354", "GAS05134", "GAS06216", "GAS09426", "GAS11282", 
"GAS12252", "GAS12271", "GAS131472", "GAS13399"), class = "factor"), 
    isTip = c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 
    TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, 
    FALSE, FALSE, FALSE, FALSE, FALSE), branch = c(0, 0, 0, 0.017717391, 
    0.820068164, 0.849901793, 0.849901793, 0.924853253, 0.924853253, 
    0.924853253, 0.910326087, 0.910326087, 0, 0, 0.007717391, 
    0.242502963, 0.485567279, 0.645788061, 0.81995725, 0.713208334, 
    0.924853253, 0.689948615), angle = c(90, 60, 30, 120, 210, 
    240, 270, 330, 360, 300, 150, 180, 73.4375, 130.3125, 170.625, 
    221.25, 277.5, 232.5, 255, 322.5, 345, 165)), .Names = c("node", 
"parent", "branch.length", "x", "y", "label", "isTip", "branch", 
"angle"), class = "data.frame", row.names = c("1", "2", "3", 
"4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", 
"16", "17", "18", "19", "20", "21", "22"))