你如何让geom_map显示地图的所有部分?

时间:2014-04-04 06:31:06

标签: r plot ggplot2

我刚开始使用geom_map中的ggplot2功能。在阅读了我在geom_map找到的29篇帖子后,我仍然遇到了同样的问题。

我的数据框架非常大,包含超过2000行。它基本上是来自世界卫生组织编制的特定基因(TP53)的数据。

请从here下载。

标题如下所示:

> head(ARCTP53_SOExample)
  Mutation_ID MUT_ID hg18_Chr17_coordinates hg19_Chr17_coordinates ExonIntron Genomic_nt Codon_number
1          16   1789                7519192                7578467     5-exon      12451          155
2          13   1741                7519200                7578475     5-exon      12443          152
3          17   2143                7519131                7578406     5-exon      12512          175
4          14   2143                7519131                7578406     5-exon      12512          175
5          15   2168                7519128                7578403     5-exon      12515          176
6          12   3737                7517845                7577120     8-exon      13798          273
  Description c_description g_description       g_description_hg18 WT_nucleotide Mutant_nucleotide
1         A>G      c.463A>G  g.7578467T>C NC_000017.9:g.7519192T>C           A                   G
2         C>T      c.455C>T  g.7578475G>A NC_000017.9:g.7519200G>A           C                   T
3         G>A      c.524G>A  g.7578406C>T NC_000017.9:g.7519131C>T           G                   A
4         G>A      c.524G>A  g.7578406C>T NC_000017.9:g.7519131C>T           G                   A
5         G>T      c.527G>T  g.7578403C>A NC_000017.9:g.7519128C>A           G                   T
6         G>A      c.818G>A  g.7577120C>T NC_000017.9:g.7517845C>T           G                   A
  Splice_site CpG_site           Type Mut_rate WT_codon Mutant_codon WT_AA Mutant_AA ProtDescription
1          no       no        A:T>G:C    0.170      ACC          GCC   Thr       Ala         p.T155A
2          no      yes G:C>A:T at CpG    1.243      CCG          CTG   Pro       Leu         p.P152L
3          no      yes G:C>A:T at CpG    1.280      CGC          CAC   Arg       His         p.R175H
4          no      yes G:C>A:T at CpG    1.280      CGC          CAC   Arg       His         p.R175H
5          no       no        G:C>T:A    0.054      TGC          TTC   Cys       Phe         p.C176F
6          no      yes G:C>A:T at CpG    1.335      CGT          CAT   Arg       His         p.R273H
  Mut_rateAA   Effect Structural_motif Putative_stop Sample_Name Sample_ID Sample_source Tumor_origin Grade
1      0.170 missense NDBL/beta-sheets             0    CAS91-19        17       surgery      primary      
2      1.243 missense NDBL/beta-sheets             0     CAS91-4        14       surgery      primary      
3      1.280 missense            L2/L3             0    CAS91-13        12       surgery      primary      
4      1.280 missense            L2/L3             0     CAS91-5        15       surgery      primary      
5      0.054 missense            L2/L3             0     CAS91-1        16       surgery      primary      
6      1.335 missense          L1/S/H2             0     CAS91-3        13       surgery      primary      
  Stage TNM p53_IHC KRAS_status Other_mutations Other_associations
1              <NA>        <NA>            <NA>                   
2              <NA>        <NA>            <NA>                   
3              <NA>        <NA>            <NA>                   
4              <NA>        <NA>            <NA>                   
5              <NA>        <NA>            <NA>                   
6              <NA>        <NA>            <NA>                   
                                                                 Add_Info Individual_ID  Sex Age Ethnicity
1 Mutation only present in adjacent dysplastic area (Barrett's esophagus)            17 <NA>  NA          
2 Mutation only present in adjacent dysplastic area (Barrett's esophagus)            14 <NA>  NA          
3 Mutation only present in adjacent dysplastic area (Barrett's esophagus)            12 <NA>  NA          
4 Mutation only present in adjacent dysplastic area (Barrett's esophagus)            15 <NA>  NA          
5                                                                                    16 <NA>  NA          
6      Mutation absent from adjacent dysplasia area (Barrett's esophagus)            13 <NA>  NA          
  Geo_area Country            Development       Population   Region TP53polymorphism Germline_mutation
1              USA More developed regions Northern America Americas                                 NA
2              USA More developed regions Northern America Americas                                 NA
3              USA More developed regions Northern America Americas                                 NA
4              USA More developed regions Northern America Americas                                 NA
5              USA More developed regions Northern America Americas                                 NA
6              USA More developed regions Northern America Americas                                 NA
  Family_history Tobacco Alcohol Exposure Infectious_agent Ref_ID Cross_Ref_ID  PubMed Exclude_analysis
1                   <NA>    <NA>     <NA>             <NA>      4           NA 1868473            False
2                   <NA>    <NA>     <NA>             <NA>      4           NA 1868473            False
3                   <NA>    <NA>     <NA>             <NA>      4           NA 1868473            False
4                   <NA>    <NA>     <NA>             <NA>      4           NA 1868473            False
5                   <NA>    <NA>     <NA>             <NA>      4           NA 1868473            False
6                   <NA>    <NA>     <NA>             <NA>      4           NA 1868473            False
  WGS_WXS
1      No
2      No
3      No
4      No
5      No
6      No

无论如何,我想创建一个 简单的世界地图,它会为已经研究过这种突变的国家/ 添加颜色,如果或多或少&# 34;变异签名&#34;来自这些国家。

如果您看到这一点,您可能会更好地理解我想要做的事情:

summary(ARCTP53_SOExample$Country)
Australia                  Brazil                  Canada                   China 
                      1                     127                      76                     519 
       China, Hong-Kong Chinese Taipei (Taiwan)          Czech Republic                   Egypt 
                     52                      36                       9                       9 
                 France                 Germany                   India                    Iran 
                    195                      10                      63                     112 
                Ireland                   Italy                   Japan                   Kenya 
                     25                      30                     414                      11 
           South Africa                   Spain             Switzerland                Thailand 
                     13                       2                      24                      35 
        The Netherlands                      UK                 Uruguay                     USA 
                      6                      17                       6                     189 
                   NA's 
                     30 

所以有些国家/地区的data.frame多次出现。

所以这就是我为了得到我想要的地图而做的事情:

library(ggplot2)
library(maps)
world_map<-map_data("world")
ggplot(ARCTP53_SOExample)+geom_map(map = world_map, aes(map_id = Country,fill = Country),
+ colour = "black") +
+ expand_limits(x = world_map$long, y = world_map$lat)

这是我得到的: This map only contains the countries in my list...

有没有人对我做错了什么有任何意见?

此外,我接下来要做的就是将geom_bar()列的ExonIntron添加到不同的国家/地区。但是,我想首先尝试生成正确的地图?

感谢工厂。

1 个答案:

答案 0 :(得分:11)

ARC…数据框中缺少的国家/ =地图上缺少的区域可以使用world_map数据框构建的基础层进行补偿:

library(maps)

world_map<-map_data("world")

gg <- ggplot(ARCTP53_SOExample)

# need one layer with ALL THE THINGS (well, all the regions)
gg <- gg + geom_map(dat=world_map, map = world_map, 
                    aes(map_id=region), fill="white", color="black")

# now we can put the layer we really want
gg <- gg + geom_map(map = world_map, 
                    aes(map_id = Country, fill = Country), colour = "black")

gg <- gg + expand_limits(x = world_map$long, y = world_map$lat)
gg <- gg + theme(legend.position="none")
gg

map1

我删除了传说,因为使用一个等值的方式,假设人们知道地理。

注意:每个区域(国家/地区)使用不同的颜色确实不是一个好主意。既然您真的只想突出研究突变的位置,那么单一颜色就足够了:

gg <- ggplot(ARCTP53_SOExample)
gg <- gg + geom_map(dat=world_map, map = world_map, aes(map_id=region), 
                    fill="white", color="black")
gg <- gg + geom_map(map = world_map, aes(map_id = Country), 
                    fill = "steelblue", colour = "black")
gg <- gg + expand_limits(x = world_map$long, y = world_map$lat)
gg <- gg + theme(legend.position="none")
gg

map2

由于您最终想要讲述ExonIntron的故事,您可能需要考虑将其作为等值的颜色。我对基因一无所知,所以我不知道渐变是否有意义,或者是否有明确的颜色可供选择。我认为由以下代码创建的大量不同颜色使我认为您可能想要为intron执行一个渐变比例,为extron执行一个渐变比例。同样,我不是一个基因人。

gg <- ggplot(ARCTP53_SOExample)
gg <- gg + geom_map(dat=world_map, map = world_map, aes(map_id=region), 
                    fill="white", color="black")
gg <- gg + geom_map(map = world_map, aes(map_id = Country, fill = ExonIntron), 
                    colour = "black")
gg <- gg + expand_limits(x = world_map$long, y = world_map$lat)
gg

map3

某些颜色位于非常小的区域,或者位于名称与world_map$region中的名称不匹配的区域。你可能想看一下。这样:

wm.reg <- unique(as.character(world_map$region))
arc.reg <- unique(as.character(ARCTP53_SOExample$Country))

arc.reg %in% wm.reg
##  [1]  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE
## [14]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE FALSE FALSE  TRUE  TRUE

有点显示有些人遗失了。

如果您使用图例来构建自己的结果表,您可能还需要考虑以不同方式进行图例处理(即将其置于底部)。

更新

我差点忘了。由于你(很可能)不需要南极洲,你应该摆脱它,因为它占用了相当多的宝贵空间:

world_map <- subset(world_map, region!="Antarctica")

gg <- ggplot(ARCTP53_SOExample)
gg <- gg + geom_map(dat=world_map, map = world_map, aes(map_id=region), 
                    fill="white", color="black")
gg <- gg + geom_map(map = world_map, aes(map_id = Country, fill = ExonIntron), 
                    colour = "black")
gg <- gg + expand_limits(x = world_map$long, y = world_map$lat)
gg <- gg + theme(legend.position="none")
gg

map4

(注意:我摆脱了传说,因为我真的认为你应该重新思考你想要地图上的颜色,然后使用额外的表格或情节作为传说)


最终更新(根据以下评论中的每个OP请求)

library(ggplot2)
library(maps)
library(plyr)
library(gridExtra)

ARCTP53_SOExample <- read.csv("dat.csv")

# reduce all the distinct exon/introns to just exon or intron

ARCTP53_SOExample$EorI <- factor(ifelse(grepl("exon", 
                                              ARCTP53_SOExample$ExonIntron, 
                                              ignore.case = TRUE), 
                                        "exon", "intron"))

# extract summary data for the two variables we care about for the map

arc.combined <- count(ARCTP53_SOExample, .(Country, EorI))
colnames(arc.combined) <- c("region", "EorI", "ei.ct")

# get total for country (region) and add to the summary info

arc.combined <- merge(arc.combined, count(arc.combined, .(region), wt_var=.(ei.ct)))
colnames(arc.combined) <- c("region", "EorI", "ei.ct", "region.total")

# it wasn't specified if the "EorI" is going to be used on the map so 
# we won't use it below (but we could, now)

# get map and intercourse Antarctica

world_map <- map_data("world")
world_map <- subset(world_map, region!="Antarctica")

# this will show the counts by country with all of the "chart junk" removed
# and the "counts" scaled as a gradient, and with the legend at the top

gg <- ggplot(arc.combined)
gg <- gg + geom_map(dat=world_map, map = world_map, aes(map_id=region), 
                    fill="white", color="#7f7f7f", size=0.25)
gg <- gg + geom_map(map = world_map, aes(map_id = region, fill = region.total), size=0.25)
gg <- gg + scale_fill_gradient(low="#fff7bc", high="#cc4c02", name="Tumor counts")
gg <- gg + expand_limits(x = world_map$long, y = world_map$lat)
gg <- gg + labs(x="", y="", title="Tumor contribution by country")
gg <- gg + theme(panel.grid=element_blank(), panel.border=element_blank())
gg <- gg + theme(axis.ticks=element_blank(), axis.text=element_blank())
gg <- gg + theme(legend.position="top")
gg

mapb

# BUT you might want to show the counts by intron/exon by country
# SO we do a separate map for each factor and combine them
# with some grid magic. This provides more granular control over
# each choropleth (in the event one wanted to tweak one or the other)

# exon

gg <- ggplot(arc.combined[arc.combined$EorI=="exon",])
gg <- gg + geom_map(dat=world_map, map = world_map, aes(map_id=region), 
                    fill="white", color="#7f7f7f", size=0.25)
gg <- gg + geom_map(map = world_map, aes(map_id = region, fill = ei.ct), size=0.25)
gg <- gg + scale_fill_gradient(low="#f7fcb9", high="#238443", name="Tumor counts")
gg <- gg + expand_limits(x = world_map$long, y = world_map$lat)
gg <- gg + labs(x="", y="", title="Tumor contribution by 'exon' & country")
gg <- gg + theme(panel.grid=element_blank(), panel.border=element_blank())
gg <- gg + theme(axis.ticks=element_blank(), axis.text=element_blank())
gg <- gg + theme(legend.position="top")
gg.exon <- gg

# intron

gg <- ggplot(arc.combined[arc.combined$EorI=="intron",])
gg <- gg + geom_map(dat=world_map, map = world_map, aes(map_id=region), 
                    fill="white", color="#7f7f7f", size=0.25)
gg <- gg + geom_map(map = world_map, aes(map_id = region, fill = ei.ct), 
                    colour = "#7f7f7f", size=0.25)
gg <- gg + scale_fill_gradient(low="#ece7f2", high="#0570b0", name="Tumor counts")
gg <- gg + expand_limits(x = world_map$long, y = world_map$lat)
gg <- gg + labs(x="", y="", title="Tumor contribution by 'intron' & country")
gg <- gg + theme(panel.grid=element_blank(), panel.border=element_blank())
gg <- gg + theme(axis.ticks=element_blank(), axis.text=element_blank())
gg <- gg + theme(legend.position="top")
gg.intron <- gg

# use some grid magic to combine them into one plot

grid.arrange(gg.exon, gg.intron, ncol=1)

mapb