Question

我正在尝试使用ggplot2 geom_raster函数创建一个热图。热图具有许多不同长度和层次的y标签。

以下代码产生一个示例。我创建了一个生成标签长度的层次截断的函数，称为cattrim（如果已经编写了此函数，我很乐意使用其他人的名字。

不幸的是，我一直未能弄清楚如何颠倒离散y标签的列表，以使分类变量可读。最高级别为值1,2,3，其次最高值为A，B，C，D，其后为l，m，n，o。

理想情况下，它们将以以下方式显示：

1 A l
    m 
    n
  B m
    n
    o
2 A m
  C n
    o

R代码：

library(ggplot2)
library(dplyr)
library(stringr)

n <- 100
x <- paste0(sample(rep(1:3,n)),"-",
            sample(rep(LETTERS[1:4],n)),"-",
            sample(rep(letters[10:15],n))) %>% sort

df <- tibble(lab = x, cat=ceiling(runif(length(x))*10), n=rpois(length(x),3.4) + 1)

df2 <- df %>% group_by(lab,cat) %>% summarize(n=sum(n)) %>% filter(n>10)

png("heatmap-cat1.png")
df2 %>%
  ggplot(aes(factor(cat), lab )) +
  geom_raster(aes(fill = n)) + theme_bw() +
  theme(axis.text.y = element_text(family="mono"))
dev.off()

# Breaks input text into three columns which are trimmed to reduce the complexity of the data
cattrim <- function(x, sep="-", maxlen=20, closer=" ") {
  xf <-  factor(x)
  xfl <- levels(xf) %>% str_split_fixed(sep,3)

  # Remove leading and trailing white space
  xfl[,1] <- xfl[,1] %>% str_trim
  xfl[,2] <- xfl[,2] %>% str_trim
  xfl[,3] <- xfl[,3] %>% str_trim

  # Remove any redundant category titles
  xfl[-1,1][xfl[-1,1] == xfl[-nrow(xfl),1]] <- ""
  xfl[-1,2][xfl[-1,2] == xfl[-nrow(xfl),2]] <- ""

  # Make sure each category has the same length
  xfl[,1] <- xfl[,1] %>% format(width=max(nchar(.))) %>% str_sub(1,maxlen)
  xfl[,2] <- xfl[,2] %>% format(width=max(nchar(.))) %>% str_sub(1,maxlen)
  xfl[,3] <- xfl[,3] %>% format(width=max(nchar(.))) %>% str_sub(1,maxlen)
  levels(xf) <- apply(xfl, 1, paste, collapse=closer)
  xf
}

png("heatmap-cat2.png")
df2 %>%
  ggplot(aes(factor(cat), lab )) +
  geom_raster(aes(fill = n)) + theme_bw() +
  theme(axis.text.y = element_text(family="mono")) +
  scale_y_discrete(
    breaks = sort(unique(df2$lab)), 
    labels = cattrim(sort(unique(df2$lab)), maxlen = 15))
dev.off()

具有层次结构标签的热图（栅格）

0 个答案: