如何创建使用不等间距矩阵图覆盖点数据的图?

时间:2018-03-20 12:25:40

标签: r ggplot2 visualization

我想创建一个类似于附加图像的图,其中点数据放在矩阵颜色图上:

Excel plot overlaying point chart over coloured cells

我有xy个数据。然后,我通过计算位于xy bin组合中的xy_bincountx中的点数来创建矩阵y。箱宽度不均匀,如附图所示。

在R,Matlab或Python中创建这个图会更容易吗?

感谢您的帮助!

x<-c(2.56481, 2.11009, 1.72927, 1.47803, 1.74279, 3.29555, 3.66061, 2.63349, 2.43808, 2.13, 3.09267, 2.3555, 2.48811, 4.05344, 3.38401, 2.69907, 2.26378, 2.71978)
y<-c(-1.26044, 13.6098, 0.710325, -4.27657, 11.1908, -7.2431, -3.19167, 20.7423, 10.009, 32.12, 42.6192, 13.9598, -0.412724, -20.3846, -6.97259, -14.2046, 8.30859, 0.0386572)
xylabels<-c("A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R")

xy_bincount<-matrix(c(0, 0, 0, 6, 0, 0, 6, 12, 0, 0, 24, 6, 0, 0, 29, 0, 0, 0, 12, 6),nrow = 5, ncol = 4, byrow = TRUE)

1 个答案:

答案 0 :(得分:5)

你可以尝试

library(tidyverse)
y_breaks <-  c(-25,-15,-5,5,15, 55)
x_breaks <-  c(0,0.5,1.5,3, 4.5)
foo <- function(x) as.numeric(as.character(x))
tibble(x,y) %>% 
  mutate(y_bins=cut(y, breaks = y_breaks, labels = y_breaks[-1],include.lowest = T)) %>% 
  mutate(x_bins=cut(x, breaks = x_breaks , labels = x_breaks[-1], include.lowest = T)) %>% 
  add_count(y_bins, x_bins) %>% 
  mutate(percent=n/n()) %>% 
    ggplot(aes(x,y)) +
    geom_point() + 
    geom_text(data = . %>% 
                     select(y_bins , x_bins, percent) %>% 
                     complete(y_bins, x_bins, fill=list(percent=0)) %>% 
                     distinct(), 
              aes(x=foo(x_bins)-0.15, y=foo(y_bins)-2, label=scales::percent(percent)),
              color="red")+
    scale_x_continuous(breaks = x_breaks, limits = c(0,4.5), expand = c(0, 0), minor_breaks=NULL,position="top") +
    scale_y_reverse(breaks = y_breaks, limits = c(55,-25), expand = c(0, 0),minor_breaks=NULL) 

enter image description here

对于矩形,您可以使用此硬编码解决方案。

# calculate the positions for the rectangle, e.g. xmin, ymin and xmax, ymax
df2 <- df1 %>% 
  select(y_bins , x_bins, percent) %>% 
  complete(y_bins, x_bins, fill=list(percent=0)) %>% 
  distinct() %>% 
  bind_cols(
    tibble(y_start=y_breaks[-6],
           y_end=y_breaks[-1]) %>% 
      bind_rows(.,.,.,.) %>% 
      arrange(y_start)    %>%  
      mutate(x_start=rep(x_breaks[-5],5),
             x_end=rep(x_breaks[-1],5))) %>% 
  mutate(percent_gr=as.numeric(gsub("%","",percent)))
# and the plot
df1 %>% 
  ggplot(aes(x,y)) +
  geom_rect(data = df2,
            aes(xmin=x_start, xmax=x_end, ymin=y_start, ymax=y_end, fill=percent_gr),
            alpha=0.8,inherit.aes = FALSE) +
  geom_point() + 
  geom_text(data = . %>% 
              select(y_bins , x_bins, percent) %>% 
              complete(y_bins, x_bins, fill=list(percent=0)) %>% 
              distinct(), 
              aes(x=foo(x_bins)-0.15, y=foo(y_bins)-2, label=percent))+
  scale_x_continuous(breaks = x_breaks, limits = c(0,4.5), expand = c(0, 0), minor_breaks=NULL,position="top") +
  scale_y_reverse(breaks = y_breaks, limits = c(55,-25), expand = c(0, 0), minor_breaks=NULL) +
  scale_fill_gradient(low = "white", high = "red") +
   theme_linedraw() 

enter image description here

最后,您可以使用geom_tile

尝试一体化解决方案
tibble(x,y) %>% 
  mutate(y_bins=cut(y, breaks = y_breaks, labels = y_breaks[-1],include.lowest = T)) %>% 
  mutate(x_bins=cut(x, breaks = x_breaks , labels = x_breaks[-1], include.lowest = T)) %>% 
  add_count(y_bins, x_bins) %>% 
  mutate(percent=scales::percent(n/n())) %>% 
    ggplot(aes(x,y)) +
    geom_tile(data = . %>% 
                select(y_bins , x_bins, percent) %>% 
                complete(y_bins, x_bins, fill=list(percent=0)) %>% 
                distinct() %>% 
                group_by(y_bins) %>% 
                mutate(w=-(lag(foo(x_bins),default = 0)-foo(x_bins)),
                       x=foo(x_bins)-w/2) %>% 
                group_by(x_bins) %>% 
                arrange(x_bins) %>% 
                mutate(h=-(lag(foo(y_bins),default = -25)-foo(y_bins)),
                       y=foo(y_bins)-h/2) %>% 
                mutate(percent_gr=as.numeric(gsub("%","",percent))),
              aes(y=y, x=x,width=w,height=h, fill=percent_gr))+
    geom_point() + 
  geom_text(data = . %>% 
              select(y_bins , x_bins, percent) %>% 
              complete(y_bins, x_bins, fill=list(percent=0)) %>% 
              distinct(), 
            aes(x=foo(x_bins)-0.15, y=foo(y_bins)-2, label=percent))+
  scale_x_continuous(breaks = x_breaks, limits = c(0,4.5), expand = c(0, 0), minor_breaks=NULL,position="top") +
  scale_y_reverse(breaks = y_breaks, limits = c(55,-25), expand = c(0, 0),minor_breaks=NULL) +
  scale_fill_gradient(low = "white", high = "red") +
  theme_linedraw()