使用ggridges可视化泊松随机样本组

时间:2017-12-15 03:33:54

标签: r ggplot2 visualization data-visualization ggridges

我有两组数据,都在一个数据框中。第一组与位置1中收集的数据相关,第二组与位置2相关。每个位置具有5个月的不同计数数据(列value)。

# DataSet
-----------------
rp_data <-    structure(list(Month = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), .Label = c("1", 
"2", "3", "4", "5"), class = "factor"), location = c("1", "1", 
"1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", 
"1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", 
"1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", 
"1", "1", "1", "1", "1", "1", "1", "1", "1", "2", "2", "2", "2", 
"2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", 
"2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", 
"2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", 
"2", "2", "2", "2", "2", "2", "2"), value = c(0L, 1L, 1L, 1L, 
2L, 1L, 0L, 0L, 1L, 1L, 3L, 2L, 1L, 4L, 1L, 3L, 1L, 1L, 1L, 1L, 
2L, 2L, 1L, 0L, 2L, 4L, 3L, 5L, 5L, 0L, 4L, 3L, 3L, 4L, 2L, 5L, 
2L, 3L, 10L, 6L, 5L, 6L, 4L, 6L, 4L, 5L, 6L, 5L, 3L, 7L, 1L, 
1L, 1L, 1L, 0L, 0L, 2L, 1L, 2L, 0L, 2L, 3L, 4L, 1L, 2L, 1L, 2L, 
0L, 2L, 2L, 4L, 4L, 5L, 1L, 4L, 5L, 4L, 5L, 1L, 4L, 3L, 7L, 7L, 
4L, 2L, 5L, 4L, 1L, 5L, 3L, 7L, 3L, 4L, 8L, 5L, 7L, 1L, 1L, 6L, 
3L)), .Names = c("Month", "location", "value"), row.names = c(NA, 
-100L), class = "data.frame")

我使用下面的示例,如ggridges examples webpage,所示,以显示不同月份的各种计数值。

# Plot 1 , filtering data related to location = 1
#---------------

ggplot(rp_data[rp_data$location == '1',], aes(x = value, y = Month, group = Month)) +
  geom_density_ridges2(aes(fill = Month), stat = "binline", binwidth = 1, scale = 0.95) +
  geom_text(stat = "bin",
            aes(y = group + 0.95*(..count../max(..count..)),
                label = ifelse(..count..>0, ..count.., "")),
            vjust = 1.4, size = 3, color = "white", binwidth = 1) +
  scale_x_continuous(breaks = c(0:12), limits = c(-.5, 13), expand = c(0, 0),
                     name = "random value") +
  scale_y_discrete(expand = c(0.01, 0), name = "Month",
                   labels = c("5.0", "4.0", "3.0", "2.0", "1.0")) +
  scale_fill_cyclical(values = c("#0000B0", "#7070D0")) +
  labs(title = "Poisson random samples location 1 different Month",
       subtitle = "sample size n=10") +
  guides(y = "none") +
  theme_ridges(grid = FALSE) +
  theme(axis.title.x = element_text(hjust = 0.5),
        axis.title.y = element_text(hjust = 0.5))

# Plot 2 , filtering data related to location = 2
#---------------

ggplot(rp_data[rp_data$location == '2',], aes(x = value, y = Month, group = Month)) +
  geom_density_ridges2(aes(fill = Month), stat = "binline", binwidth = 1, scale = 0.95) +
  geom_text(stat = "bin",
            aes(y = group + 0.95*(..count../max(..count..)),
                label = ifelse(..count..>0, ..count.., "")),
            vjust = 1.4, size = 3, color = "white", binwidth = 1) +
  scale_x_continuous(breaks = c(0:12), limits = c(-.5, 13), expand = c(0, 0),
                     name = "random value") +
  scale_y_discrete(expand = c(0.01, 0), name = "Month",
                   labels = c("5.0", "4.0", "3.0", "2.0", "1.0")) +
  scale_fill_cyclical(values = c("#0000B0", "#7070D0")) +
  labs(title = "Poisson random samples location 2 different Month",
       subtitle = "sample size n=10") +
  guides(y = "none") +
  theme_ridges(grid = FALSE) +
  theme(axis.title.x = element_text(hjust = 0.5),
        axis.title.y = element_text(hjust = 0.5))

情节1的结果:

enter image description here

我的问题是如何将这两个图组合起来,有点像叠加图shown in this example

enter image description here

我不想在两个单独的地块上绘制它们。

1 个答案:

答案 0 :(得分:2)

您需要创建一个包含Monthlocation的分组变量。您可以使用paste0(Month, location)执行此操作。就目前而言,我正在遗漏文本标签,尽管可能还需要更多考虑。 (但我认为他们会让这个数字太忙。)

ggplot(rp_data,
       aes(x = value, y = Month,
           group = paste0(Month, location),
           fill = paste0(Month, location))) +
  geom_density_ridges2(stat = "binline", binwidth = 1,
                       scale = 0.95, alpha = 0.7) +
  scale_x_continuous(breaks = c(0:12), limits = c(-.5, 13),
                     expand = c(0, 0), name = "random value") +
  scale_y_discrete(expand = c(0.01, 0), name = "Month",
                   labels = c("5.0", "4.0", "3.0", "2.0", "1.0")) +
  scale_fill_cyclical(values = c("#0000B0", "#B00000",
                                 "#7070D0", "#FC5E5E")) +
  labs(title = "Poisson random samples location 1 different Month",
       subtitle = "sample size n=10") +
  guides(y = "none") +
  theme_ridges(grid = FALSE, center = TRUE)

enter image description here

编辑:现在使用文字标签。

ggplot(rp_data, aes(x = value, y = Month, group = paste0(Month, location), fill = paste0(Month, location))) +
  geom_density_ridges2(stat = "binline", binwidth = 1, scale = 0.95, alpha = 0.7) +
  geom_text(stat = "bin",
            aes(y = ceiling(group/2) + 0.95*(..count../max(..count..)),
                label = ifelse(..count..>0, ..count.., ""), color = location),
            vjust = 1.4, size = 3, binwidth = 1, fontface = "bold") +
  scale_x_continuous(breaks = c(0:12), limits = c(-.5, 13), expand = c(0, 0),
                     name = "random value") +
  scale_y_discrete(expand = c(0.01, 0), name = "Month",
                   labels = c("5.0", "4.0", "3.0", "2.0", "1.0")) +
  scale_fill_cyclical(values = c("#0000B0", "#B00000", "#7070D0", "#FC5E5E")) +
  scale_color_cyclical(values = c("white", "black")) +
  labs(title = "Poisson random samples location 1 different Month",
       subtitle = "sample size n=10") +
  guides(y = "none") +
  theme_ridges(grid = FALSE, center = TRUE)

enter image description here

再次,不确定这是一个好主意,但你去了。