R / ggplot中的复杂图表与正确的图例显示

时间:2018-02-12 17:05:41

标签: r ggplot2

这是我向StackExchange提出的第一个问题,我已经搜索过有用的答案,但我并没有真正把我带到我想去的地方。

这是一个堆积条形图,结合点图表,结合一条线。

这是我的代码:

theme_set(theme_light())

library(lubridate)

FM <- as.Date('2018-02-01')

x.range <- c(FM - months(1) - days(1) - days(day(FM) - 1), FM - days(day(FM) - 1) + months(1))
x.ticks <- seq(x.range[1] + days(1), x.range[2], by = 2)

#populate example data
preds <- data.frame(FM = FM, DATE = seq(x.range[1] + days(1), x.range[2] - days(1), by = 1))
preds <- data.frame(preds, S_O = round(seq(1, 1000000, by = 1000000/nrow(preds))))
preds <- data.frame(preds, S = round(ifelse(month(preds$FM) == month(preds$DATE), day(preds$DATE) / 30.4, 0) * preds$S_O))
preds <- data.frame(preds, O = preds$S_O - preds$S)
preds <- data.frame(preds, pred_sales = round(1000000 + rnorm(nrow(preds), 0, 10000)))
preds$ma <- with(preds, stats::filter(pred_sales, rep(1/5, 5), sides = 1))

y.max <- ceiling(max(preds$pred_sales) / 5000) * 5000 + 15000


line.cols <- c(O = 'palegreen4', S = 'steelblue4', 
               P = 'maroon', MA = 'blue')
fill.cols <- c(O = 'palegreen3', S = 'steelblue3', 
               P = 'red')

p <- ggplot(data = preds, 
            mapping = aes(DATE, pred_sales))
p <- p + 
  geom_bar(data = reshape2::melt(preds[,c('DATE', 'S', 'O')], id.var = 'DATE'), 
           mapping = aes(DATE, value, group = 1, fill = variable, color = variable),
           width = 1, 
           stat = 'identity', 
           alpha = 0.5) +
  geom_point(mapping = aes(DATE, pred_sales, group = 2, fill = 'P', color = 'P'),
             shape = 22, #square
             alpha = 0.5,
             size = 2.5) +
  geom_line(data = preds[!is.na(preds$ma),],
            mapping = aes(DATE, ma, group = 3, color = 'MA'),
            alpha = 0.8,
            size = 1) +
  geom_text(mapping = aes(DATE, pred_sales, label = formatC(pred_sales / 1000, format = 'd', big.mark = ',')),
            angle = 90, 
            size = 2.75,
            hjust = 1.25,
            vjust = 0.4) +
  labs(title = sprintf('%s Sales Predictions - %s', 'Overall', format(FM, '%b %Y')), 
       x = 'Date', 
       y = 'Volume in MMlbs') +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1, size = 8),
        panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        legend.title = element_blank(),
        legend.position = 'bottom',
        legend.text = element_text(size = 8),
        legend.margin = margin(t = 0.25, unit = 'cm')) +
  scale_x_date(breaks = x.ticks, 
               date_labels = '%b %e',
               limits = x.range) + 
  scale_y_continuous(limits = c(0, y.max), 
                     labels = function(x) { formatC(x / 1000, format='d', big.mark=',') }) +
  scale_color_manual(values = line.cols,
                     breaks = c('MA'),
                     labels  = c(MA = 'Mvg Avg (5)')) +
  scale_fill_manual(values  = fill.cols,
                    breaks  = c('P', 'O', 'S'),
                    labels  = c(O = 'Open Orders', S = 'Sales', P = 'Predictions'))
p

它生成的图表是:

R ggplot 1

正如你所看到的,传说做了几件时髦的事情。它很接近,但并不完全。我只想要带有外部边框的盒子用于预测,开放订单和销售,而只有蓝色线条用于Mvg Avg(5)。

任何建议都将不胜感激。

谢谢!

1 个答案:

答案 0 :(得分:0)

还不算太晚,但是如果您仍然想了解这个问题,则可以使用以下方法。解释包含在代码中作为注释:

library(dplyr)

preds %>%

  # scale the values for ALL numeric columns in the dataset, before
  # passing the dataset to ggplot()
  mutate_if(is.numeric, ~./1000) %>% 

  # since x / y mappings are stated in the top level ggplot(), there's
  # no need to repeat them in the subsequent layers UNLESS you want to
  # override them
  ggplot(mapping = aes(x = DATE, y = pred_sales)) + 

  # 1. use data = . to inherit the top level data frame, & modify it on
  # the fly for this layer; this is neater as you are essentially 
  # using a single data source for the ggplot object.
  # 2. geom_col() is a more succinct way to say geom_bar(stat = "identity")
  # (I'm using tidyr rather than reshape package, since ggplot2 is a
  # part of the tidyverse packages, & the two play together nicely)
  geom_col(data = . %>% 
             select(S, O, DATE) %>% 
             tidyr::gather(variable, value, -DATE),
           aes(y = value, fill = variable, color = variable),
           width = 1, alpha = 0.5) +

  # don't show legend for this layer (o/w the fill / color legend would
  # include a square shape in the centre of each legend key)
  geom_point(aes(fill = 'P', color = 'P'),
             shape = 22, alpha = 0.5, size = 2.5, show.legend = FALSE) +

  # use data = . %>% ... as above.
  # since the fill / color aesthetic mappings from the geom_col layer would
  # result in a border around all fill / color legends, avoid it all together
  # here by hard coding the line color to "blue", & map its linetype instead
  # to create a separate linetype-based legend later.
  geom_line(data = . %>% na.omit(),
            aes(y = ma, linetype = 'MA'),
            color = "blue", alpha = 0.8, size = 1) +

  # scales::comma is a more succinct alternative to formatC for this use case
  geom_text(aes(label = scales::comma(pred_sales)),
            angle = 90, size = 2.75, hjust = 1.25, vjust = 0.4) +

  labs(title = sprintf('%s Sales Predictions - %s', 'Overall', format(FM, '%b %Y')), 
       x = 'Date', 
       y = 'Volume in MMlbs') +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1, size = 8),
        panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        legend.title = element_blank(),
        legend.position = 'bottom',
        legend.text = element_text(size = 8),
        legend.margin = margin(t = 0.25, unit = 'cm')) +
  scale_x_date(breaks = x.ticks, 
               date_labels = '%b %e',
               limits = x.range) + 

  # as above, scales::comma is more succinct
  scale_y_continuous(limits = c(0, y.max / 1000), 
                     labels = scales::comma) +

  # specify the same breaks & labels for the manual fill / color scales, so that
  # a single legend is created for both
  scale_color_manual(values = line.cols,
                     breaks = c('P', 'O', 'S'),
                     labels = c(O = 'Open Orders', S = 'Sales', P = 'Predictions')) +
  scale_fill_manual(values = fill.cols,
                    breaks = c('P', 'O', 'S'),
                    labels = c(O = 'Open Orders', S = 'Sales', P = 'Predictions')) +

  # create a separate line-only legend using the linetype mapping, with
  # value = 1 (i.e. unbroken line) & specified alpha / color to match the
  # geom_line layer
  scale_linetype_manual(values = 1,
                        label = 'Mvg Avg (5)',
                        guide = guide_legend(override.aes = list(alpha = 1,
                                                                 color = "blue")))

plot