ggplot故障排除:错误:美学必须是长度1或与数据(24)相同:x,y,fill

时间:2018-06-12 13:52:00

标签: r ggplot2

示例数据:

> dput(droplevels(data[1:50, ]))
    structure(list(QtySold = c(3L, -1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 
2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 6L, 2L, 
1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 
1L, 1L, 2L, 1L, 2L, 2L, 4L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 
2L, -1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 1L, 2L, 
2L, 1L, 1L, 1L, 2L, 3L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 3L, 2L, 1L, 
1L, 1L, 4L, 5L, 1L, 3L, 1L, 2L, 30L, 2L, 10L, 6L, 10L, 8L, 10L, 
20L, 10L, 5L, 1L, 3L, 30L, 20L, 10L, 10L, 90L, 10L, 12L, 10L, 
15L, 10L, 10L, 20L, 10L, 10L, 32L, 10L, 10L, 20L, 1L, 2L, 1L, 
2L, 2L, 5L, 2L, 1L, 3L, 3L, 1L, 1L, 2L, 4L, 2L, 4L, 1L, 1L, 2L, 
2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 3L, 
1L, 4L, 2L, 1L, 4L, 4L, 5L, 1L, 3L, 2L, 2L, 4L, 1L, 3L, 3L, 1L, 
3L, 6L, 7L, 1L, 3L, 7L, 2L, 1L, 4L, 2L, 3L, 3L, 5L, 2L, 1L, 1L, 
4L, 6L, 1L, 1L, 1L, 1L, 2L, 1L, 4L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 4L, 1L, 3L, 2L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 
5L, 1L, 1L, 1L, 1L, 3L, 1L, 2L, 1L, 2L, 2L, 3L, 4L, 1L, 3L, 2L, 
1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 3L, 4L, 2L, 1L, 
4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 
2L, 1L, 3L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
2L, 2L, 1L, 3L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 
1L, 1L, 3L, 3L, 1L, 1L, 1L, 1L, 4L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 
1L, 2L, 3L, 1L, -1L, 1L, 1L, 1L, 2L, 2L, 2L, 10L, 1L, 1L, -4L, 
1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
4L, -1L, 2L, 3L, 1L, 1L, 5L, 1L, 10L, 1L, 1L, 4L, 1L, 1L, 1L, 
2L, 30L, 2L, 12L, 20L, 5L, 1L, 5L, 3L, 4L, 12L, 6L, 10L, 8L, 
4L, 6L, 8L, 3L, 6L, 1L, -1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 3L, 
8L, 6L, 10L, 6L, 15L, 10L, 6L, 8L, 10L, 6L, 20L, 10L, 10L, 10L, 
10L, 1L, 10L, 10L, 5L, 10L, 20L, 4L, 12L, 10L, 10L, 5L, 10L, 
6L, 1L, 1L, 2L, 4L, 1L, 1L, 1L, 3L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 
2L, 3L, 4L, 13L, 4L, 3L, 1L, 12L, 1L, 10L, 1L, 6L, 1L, 1L, 3L, 
10L, 2L, 1L, 1L, 1L, 1L, 1L, -1L, 1L, 1L, 1L, 1L, 8L, 12L, 6L, 
12L, 4L, 2L, 3L, 24L, 10L, 8L, 6L, 1L, 3L, 1L, 1L, 1L, 6L, 1L, 
1L, 1L, -1L, 1L, 2L, 3L, 1L, 1L, 1L, 2L, 1L, 1L, 4L, 1L, 10L, 
-1L), PRODUCT_SUB_LINE_DESCR = structure(c(3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 
1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("Handpieces", 
"PRIVATE LABEL", "SUNDRY"), class = "factor"), MAJOR_CATEGORY_KEY = structure(c(23L, 
23L, 23L, 23L, 21L, 21L, 21L, 23L, 23L, 23L, 11L, 11L, 11L, 11L, 
11L, 11L, 11L, 11L, 11L, 11L, 25L, 19L, 19L, 4L, 4L, 9L, 9L, 
9L, 9L, 9L, 9L, 23L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 
19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 
19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 
19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 
19L, 19L, 19L, 17L, 16L, 4L, 4L, 4L, 4L, 11L, 11L, 4L, 4L, 4L, 
4L, 11L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 21L, 12L, 12L, 12L, 10L, 10L, 10L, 
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 13L, 13L, 7L, 18L, 7L, 
19L, 19L, 19L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 
21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 
21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 
21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 
21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 
21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 
21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 
21L, 21L, 21L, 21L, 21L, 13L, 13L, 11L, 11L, 6L, 12L, 10L, 10L, 
4L, 4L, 4L, 4L, 4L, 4L, 10L, 19L, 19L, 19L, 19L, 19L, 4L, 4L, 
11L, 11L, 11L, 11L, 11L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 
21L, 21L, 21L, 21L, 21L, 21L, 11L, 16L, 16L, 16L, 16L, 16L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 16L, 16L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 21L, 4L, 
4L, 4L, 19L, 19L, 19L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 17L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 
19L, 19L, 19L, 12L, 12L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 13L, 
4L, 4L, 4L, 4L, 4L, 4L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 18L, 
21L, 21L, 18L, 18L, 18L, 18L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 
12L, 12L, 12L, 12L, 12L, 12L, 19L, 19L, 18L, 18L, 4L, 11L), .Label = c("AIR ", 
"AML ", "ANS ", "ASE ", "ASP ", "B&D ", "BLE ", "C&P ", "CBL ", 
"CEM ", "CMP ", "CRN ", "END ", "FNP ", "GYP ", "HND ", "IMP ", 
"INS ", "OTH ", "P&P ", "PRE ", "RTC ", "SME ", "UCL ", "XRY "
), class = "factor")), row.names = c(NA, 500L), class = "data.frame")

以及相关代码

     > newdf = subset(data, select = c(QtySold, PRODUCT_SUB_LINE_DESCR, MAJOR_CATEGORY_KEY))
> sample = newdf %>% 
+   filter(QtySold < 0) %>%
+   group_by(PRODUCT_SUB_LINE_DESCR,MAJOR_CATEGORY_KEY) %>%
+   summarise(returns = sum(QtySold)) %>%
+   spread(PRODUCT_SUB_LINE_DESCR,returns, fill=0) %>%
+   mutate(total_returns = `PRIVATE LABEL` + SUNDRY + Handpieces,
+          PercentageReturn_PL = round(`PRIVATE LABEL`/ total_returns*100,1))
> sample %>%
+               group_by(PRODUCT_SUB_LINE_DESCR) %>%
+               summarise(count=n()) %>%
+               mutate(percent= paste0(round(count/sum(count)*100,1),'%'))
Error in grouped_df_impl(data, unname(vars), drop) : 
  Column `PRODUCT_SUB_LINE_DESCR` is unknown 

我的情景:

  • 列表项
  • 我有三种产品的数据集:杂物,手机和自有品牌。
  • 我根据(QtySold&lt; 0)类别过滤了数据集,并根据“PRODUCT_SUB_LINE_DESCR”对新数据进行分组, “MAJOR_CATEGORY_KEY”。
  • 我想查看属于私有标签类别的退回商品的百分比。我的代码有一个带有新列“PercentageReturn_PL”的mutate命令。
  • 最后,当我完成数学运算时,我想用Y轴上的MAJOR_CATEGORY_KEY绘制整个事物,堆积的条形图显示一个MAJOR_CATEGORY_KEY中三个产品的百分比细分。

如何解决此错误?

> sample %>%
+               group_by(PRODUCT_SUB_LINE_DESCR) %>%
+               summarise(count=n()) %>%
+               mutate(percent= paste0(round(count/sum(count)*100,1),'%'))
Error in grouped_df_impl(data, unname(vars), drop) : 
  Column `PRODUCT_SUB_LINE_DESCR` is unknown 

1 个答案:

答案 0 :(得分:2)

问题是您spread出了一列,然后尝试使用它。图中还有很多额外的东西。

以下是一种获取主要类别的PRIVATE LABEL返回百分比的方法:

sample_long = 
    newdf %>%
    filter(QtySold < 0) %>%
    group_by(MAJOR_CATEGORY_KEY) %>%
    summarize(returns = sum(QtySold),
                        percent_returns_pl = sum(QtySold[PRODUCT_SUB_LINE_DESCR == "PRIVATE LABEL"]) / returns) 

然后我们可以轻松策划:

ggplot(
    sample_long,
    aes(x = MAJOR_CATEGORY_KEY, y = percent_returns_pl)
) +
    geom_col() +
    geom_text(aes(label = scales::percent(percent_returns_pl)), vjust = -0.5) +
    scale_y_continuous(labels = scales::percent)

enter image description here

这是一个带有样本数据的无聊情节,对于完整数据应该更有趣。