手动将图例添加到ggplot2

时间:2018-09-20 08:04:23

标签: r ggplot2 legend

我想我不完全知道ggplot2图例如何工作,但是我只有这几行代码,我不能让他展示其中的一个。

这是数据集:

dati <-
structure(list(quinquennio = c("1995-2000", "1996-2001", "1997-2002", 
"1998-2003", "1999-2004", "2000-2005", "2001-2006", "2002-2007", 
"2003-2008", "2004-2009", "2005-2010", "2006-2011", "2007-2012", 
"2008-2013", "2009-2014", "2010-2015", "2011-2016", "2012-2017", 
"2013-2018"), primo_anno = c(588402L, 586231L, 576434L, 562444L, 
585496L, 585351L, 593010L, 617309L, 620897L, 613388L, 616645L, 
627166L, 618343L, 604995L, 597915L, 598747L, 614302L, 610468L, 
612675L), quinto_anno = c(372728L, 380211L, 387806L, 393974L, 
401984L, 394144L, 396725L, 413596L, 417736L, 424143L, 426651L, 
431424L, 427015L, 425553L, 430832L, 435158L, 452568L, 456038L, 
461120L), quinquennio_ok = c("1995\n2000", "1996\n2001", "1997\n2002", 
"1998\n2003", "1999\n2004", "2000\n2005", "2001\n2006", "2002\n2007", 
"2003\n2008", "2004\n2009", "2005\n2010", "2006\n2011", "2007\n2012", 
"2008\n2013", "2009\n2014", "2010\n2015", "2011\n2016", "2012\n2017", 
"2013\n2018"), primo_anno_label = c("588k", "586k", "576k", "562k", 
"585k", "585k", "593k", "617k", "620k", "613k", "616k", "627k", 
"618k", "604k", "597k", "598k", "614k", "610k", "612k"), quinto_anno_label = c("372k", 
"380k", "387k", "393k", "401k", "394k", "396k", "413k", "417k", 
"424k", "426k", "431k", "427k", "425k", "430k", "435k", "452k", 
"456k", "461k")), .Names = c("quinquennio", "primo_anno", "quinto_anno", 
"quinquennio_ok", "primo_anno_label", "quinto_anno_label"), row.names = c(NA, 
-19L), spec = structure(list(cols = structure(list(quinquennio = structure(list(), class = c("collector_character", 
"collector")), primo_anno = structure(list(), class = c("collector_integer", 
"collector")), quinto_anno = structure(list(), class = c("collector_integer", 
"collector"))), .Names = c("quinquennio", "primo_anno", "quinto_anno"
)), default = structure(list(), class = c("collector_guess", 
"collector"))), .Names = c("cols", "default"), class = "col_spec"), class = c("tbl_df", 
"tbl", "data.frame"))

这是代码:

ggplot(dati) +
  geom_text(aes(x=quinquennio_ok, y=primo_anno, label=primo_anno_label, vjust=-1.1), color="dark blue") +
  geom_text(aes(x=quinquennio_ok, y=quinto_anno, label=quinto_anno_label, vjust=2), color="dark red") +
  geom_segment(
    aes(x=quinquennio_ok,
        y=primo_anno-4000,
        xend=quinquennio_ok,
        yend=quinto_anno+10000),
    colour="dark blue", size=1, alpha=.4) +
  geom_point(aes(x=quinquennio_ok, 
                 y=primo_anno),
             size=4, alpha=.5, color="dark blue", show.legend = TRUE) +
  geom_point(aes(x=quinquennio_ok,
                 y=quinto_anno+8000),
             size=3, alpha=.5, fill="dark blue", colour="dark blue", shape=25) +
  geom_point(aes(x=quinquennio_ok,
                 y=quinto_anno),
             size=3, alpha=.5, color="dark red", show.legend = TRUE) +
  theme_minimal() +
  theme(legend.position = c(.5,.5),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.line.y = element_line(color="light grey"),
        panel.grid.minor.y = element_line(color="light grey")) +
  scale_y_continuous(breaks=seq(300000,700000,50000), limits=c(350000,650000),
                     labels = scales::unit_format(unit="k",scale=.001,sep="")) +
  labs(x="Quinquenni",
       y="Studenti iscritti",
       title="Dispersione scolastica in Italia",
       subtitle="Dal 1995 al 2018",
       caption="Fonte: Report TuttoScuola 2018")

这就是我得到的:

enter image description here

我只想用一个传说来解释,蓝点是五年期第一年的入学人数,红色点是最后一年的入学人数。 / p>

如果我离解决方案太远了,我将不胜感激一些参考链接,以更多地研究ggplot2和图例。

1 个答案:

答案 0 :(得分:0)

因此,有几种方法可以解决此问题,我给您两个。 一种选择是将数据从宽格式转换为长格式,以便每个观察结果都有一行:

library(tidyverse)

dati_long <- dati %>% 
  select(quinquennio, quinquennio_ok, primo_anno, quinto_anno) %>% 
  gather(year, value, primo_anno, quinto_anno) %>% 
  mutate(label = paste0(floor(value/1000), "k"))

dati_long
# A tibble: 38 x 5
#    quinquennio quinquennio_ok year        value label
#    <chr>       <chr>          <chr>       <int> <chr>
#  1 1995-2000   "1995\n2000"   primo_anno 588402 588k 
#  2 1996-2001   "1996\n2001"   primo_anno 586231 586k 
#  3 1997-2002   "1997\n2002"   primo_anno 576434 576k 
#  4 1998-2003   "1998\n2003"   primo_anno 562444 562k 
#  5 1999-2004   "1999\n2004"   primo_anno 585496 585k 
#  6 2000-2005   "2000\n2005"   primo_anno 585351 585k 
#  7 2001-2006   "2001\n2006"   primo_anno 593010 593k 
#  8 2002-2007   "2002\n2007"   primo_anno 617309 617k 
#  9 2003-2008   "2003\n2008"   primo_anno 620897 620k 
# 10 2004-2009   "2004\n2009"   primo_anno 613388 613k 
# ... with 28 more rows

然后,您可以根据ggplot中的年份分配颜色:

ggplot(dati_long, aes(x = quinquennio_ok, y = value, label = label, color = year)) +
  geom_point(size = 3) +
  geom_text(aes(vjust = ifelse(year == "primo_anno", -1.1, 2)), show.legend = F) +
  geom_line(aes(group = quinquennio_ok, 
                y = ifelse(year == "primo_anno", value - 4000, value + 10000)), 
            color = "dark blue", size = 1, alpha = .4) +
  geom_point(data = . %>% filter(year == "quinto_anno"), 
             mapping = aes(x = quinquennio_ok, y = value + 8000),
             size = 3, alpha = .5, fill = "dark blue", colour = "dark blue",
             shape = 25)  +
  scale_color_manual(values = c("dark blue","dark red")) +
  theme_minimal() +
  theme(legend.position = c(.5, .5),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.line.y = element_line(color = "light grey"),
        panel.grid.minor.y = element_line(color = "light grey")) +
  scale_y_continuous(breaks = seq(300000, 700000, 50000),
                     limits = c(350000, 650000),
                     labels = scales::unit_format(unit = "k", scale = .001, 
                                                  sep = "")) +
  labs(x = "Quinquenni", y = "Studenti iscritti",
       title = "Dispersione scolastica in Italia",
       subtitle = "Dal 1995 al 2018",
       caption = "Fonte: Report TuttoScuola 2018")

enter image description here

第二个选项无需转换数据即可工作,而是通过在想要具有图例的地方指定虚拟外观来实现:

ggplot(dati) +
  geom_text(aes(x = quinquennio_ok, y = primo_anno, 
                label = primo_anno_label, vjust = -1.1), color = "dark blue") +
  geom_text(aes(x = quinquennio_ok, y = quinto_anno, 
                label = quinto_anno_label, vjust = 2), color = "dark red") +
  geom_segment(aes(x = quinquennio_ok, y = primo_anno - 4000, 
                   xend = quinquennio_ok, yend = quinto_anno + 10000),
               colour = "dark blue", size = 1, alpha = .4) +
  ## The color is added inside of aes and given a name as a string 
  ## which will be displayed in the legend
  geom_point(aes(x = quinquennio_ok, y = primo_anno, color = "primo"),
             size = 4, alpha = .5, show.legend = TRUE) +
  geom_point(aes(x = quinquennio_ok, y = quinto_anno+8000),
             size = 3, alpha = .5, fill = "dark blue", 
             colour = "dark blue", shape = 25) +
  ## The color is added inside of aes and given a name as a string 
  ## which will be displayed in the legend
  geom_point(aes(x = quinquennio_ok, y = quinto_anno, color = "quinto"),
             size = 3, alpha = .5, 
             show.legend = TRUE) +
  ## Here the colors are defined manually and a name for the legend is given
  scale_color_manual("year", values = c("dark blue","dark red")) +
  theme_minimal() +
  theme(legend.position = c(.5,.5),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.line.y = element_line(color = "light grey"),
        panel.grid.minor.y = element_line(color = "light grey")) +
  scale_y_continuous(breaks = seq(300000,700000,50000), limits = c(350000,650000),
                     labels = scales::unit_format(unit = "k",scale = .001,sep = "")) +
  labs(x = "Quinquenni",
       y = "Studenti iscritti",
       title = "Dispersione scolastica in Italia",
       subtitle = "Dal 1995 al 2018",
       caption = "Fonte: Report TuttoScuola 2018")