与ggplotly()结合使用时,ticktext值不能解决ggplot2 facet_grid()分解的问题

时间:2018-07-25 13:03:13

标签: r ggplot2 ggplotly

我有一个数据框:

gene_symbol<-c("DADA","SDAASD","SADDSD","SDADD","ASDAD","XCVXCVX","EQWESDA","DASDADS","SDASDASD","DADADASD","sdaadfd","DFSD","SADADDAD","SADDADADA","DADSADSASDWQ","SDADASDAD","ASD","DSADD")
panel<-c("growth","growth","growth","growth","big","big","big","small","small","dfgh","DF","DF","DF","DF","DF","gh","DF","DF")
ASDDA<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDb<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf1<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf2<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf3<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf4<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf5<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDA1<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDb1<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf1<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf11<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf21<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf31<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf41<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf51<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
Gene_states22 <- data.frame(gene_symbol, panel, ASDDA, ASDDb, ASDDAf, ASDDAf1, ASDDAf2, 
                            ASDDAf3, ASDDAf4, ASDDAf5, ASDDA1, ASDDb1, ASDDAf1, ASDDAf11,
                            ASDDAf21, ASDDAf31, ASDDAf41, ASDDAf51)

然后我创建一个热图:

library(ggplot2); library(reshape2)
HG3 <- split(Gene_states22[,1:15], Gene_states22$panel)
HG4 <- melt(HG3, id.vars= c("gene_symbol","panel"))
HG4 <- HG4[,-5]
pp <- ggplot(HG4, aes(gene_symbol,variable)) + 
  geom_tile(aes(fill = value),
            colour = "grey50") + 
  facet_grid(~panel, scales = "free" ,space = "free") +
  scale_fill_manual(values = c("white", "red", "blue", "black", "yellow", "green", "brown"))

enter image description here 如您所见,我使用facet_grid根据面板值将热图分成几组。问题是,当我使用ggplotly(pp)时,列宽度因组而异,并且我的图看起来很丑。

enter image description here

为了解决该问题,我使用了Plotly and ggplot with facet_grid in R: How to to get yaxis labels to use ticktext value instead of range value? 的改编答案:

library(plotly)
library(ggplot2)
library(data.table)
library(datasets) 


#add fake model for use in facet
dt<-data.table(HG4[1:50,])
dt[,variable:=rownames(HG4)]
dt[,panel:=substr(variable,1,regexpr(" ",variable)-1)][panel=="",panel:=variable]

ggplot.test<-ggplot(dt,aes(gene_symbol,variable))+facet_grid(panel~.,scales="free_y",space="free",drop=TRUE)+
  geom_tile(aes(fill = value),
            colour = "grey50") + 
  scale_fill_manual(values = c("white", "red", "blue", "black", "yellow", "green", "brown")) +
  labs(title = "Heatmap", x = "gene_symbol", y = "sample", fill = "value") +
  guides(fill = FALSE)+
  theme(panel.background = element_rect(fill = NA),
        panel.spacing = unit(0.5, "lines"), ## It was here where you had a 0 for distance between facets. I replaced it by 0.5 .
        strip.placement = "outside")



p <- ggplotly(ggplot.test)
len <- length(unique(HG4$panel))


total <- 1
for (i in 2:len) {
  total <- total + length(p[['x']][['layout']][[paste('yaxis', i, sep='')]][['ticktext']])
}

spacer <- 0.01 #space between the horizontal plots
total_length = total + len * spacer
end <- 1
start <- 1

for (i in c('', seq(2, len))) {
  tick_l <- length(p[['x']][['layout']][[paste('yaxis', i, sep='')]][['ticktext']]) + 1

  #fix the y-axis
  p[['x']][['layout']][[paste('yaxis', i, sep='')]][['tickvals']] <- seq(1, tick_l)
  p[['x']][['layout']][[paste('yaxis', i, sep='')]][['ticktext']][[tick_l]] <- ''

  end <- start - spacer
  start <- start - (tick_l - 1) / total_length
  v <- c(start, end)
  #fix the size
  p[['x']][['layout']][[paste('yaxis', i, sep='')]]$domain <- v
}

p[['x']][['layout']][['annotations']][[3]][['y']] <- (p[['x']][['layout']][['yaxis']]$domain[2] + p[['x']][['layout']][['yaxis']]$domain[1]) /2
p[['x']][['layout']][['shapes']][[2]][['y0']] <- p[['x']][['layout']][['yaxis']]$domain[1]
p[['x']][['layout']][['shapes']][[2]][['y1']] <- p[['x']][['layout']][['yaxis']]$domain[2]

#fix the annotations
for (i in 3:len + 1) {
  #fix the y position
  p[['x']][['layout']][['annotations']][[i]][['y']] <- (p[['x']][['layout']][[paste('yaxis', i - 2, sep='')]]$domain[1] + p[['x']][['layout']][[paste('yaxis', i - 2, sep='')]]$domain[2]) /2
  #trim the text
  p[['x']][['layout']][['annotations']][[i]][['text']] <- substr(p[['x']][['layout']][['annotations']][[i]][['text']], 1, length(p[['x']][['layout']][[paste('yaxis', i - 2, sep='')]][['ticktext']]) * 3 - 3)
}

#fix the rectangle shapes in the background
for (i in seq(0,(len - 2) * 2, 2)) {
  p[['x']][['layout']][['shapes']][[i+4]][['y0']] <- p[['x']][['layout']][[paste('yaxis', i /2 + 2, sep='')]]$domain[1]
  p[['x']][['layout']][['shapes']][[i+4]][['y1']] <- p[['x']][['layout']][[paste('yaxis', i /2 + 2, sep='')]]$domain[2]
}
p

但是热图仍然不正确: enter image description here

1 个答案:

答案 0 :(得分:1)

所以首先要注意的是:

就您而言,我什至不确定plotly热图是否是您所需要的。另外,您应该从不将复杂的ggplot转换为可打印。它将失败!在90%的情况下。尝试在plotly或您希望结束的任何地方重新创建图。其他任何事情最终都会导致编码地狱。

我从做一些研究开始:

  1. Here很好地说明了如何在plotly中创建具有不同颜色的热图
  2. This说明了如何在子图中创建标题。

从帖子1开始,我知道我必须为您数据中的每个级别创建一个矩阵。所以我为此写了一个函数:

mymat<-as.matrix(Gene_states22[,-1:-2])

### Creates a 1-NA dummy matrix for each level. The output is stored in a list
dummy_mat<-function(mat,levels,names_col){
  mat_list<-lapply(levels,function(x){
                            mat[mat!=x]=NA
                            mat[mat==x]=1
                            mymat=t(apply(mat,2,as.numeric))
                            colnames(mymat)=names_col
                            return(mymat)
                            })
  names(mat_list)=levels
  return(mat_list)
}
my_mat_list<-dummy_mat(mymat,c('DF','low','normal','over'),Gene_states22$gene_symbol)

### Optional: The heatmap type is peculiar - I created a text-NA matrix for each category as well
text_mat<-function(mat,levels,names_col){
  mat_list<-lapply(levels,function(x){
                            mat[mat!=x]=NA
                            mat=t(mat)
                            colnames(mat)=names_col
                            return(mat)
                            })
  names(mat_list)=levels
  return(mat_list)
}
   my_mat_list_t<-text_mat(mymat,c('DF','low','normal','over'),as.character(Gene_states22$gene_symbol))

此外,我需要每个级别的颜色。这些颜色是使用某些数据框创建的。您也可以在此处编写类似的(lapply-)循环:

DF_Color <- data.frame(x = c(0,1), y = c("#DEDEDE", "#DEDEDE"))
colnames(DF_Color) <- NULL

lowColor <- data.frame(x = c(0,1), y = c("#00CCFF", "#00CCFF"))
colnames(lowColor) <- NULL

normColor <- data.frame(x = c(0,1), y = c("#DEDE00", "#DEDE00"))
colnames(normColor) <- NULL

overColor <- data.frame(x = c(0,1), y = c("#DE3333", "#DE3333"))
colnames(overColor) <- NULL

此外,我们需要每个面板类别的矩阵列:

mycols<-lapply(levels(Gene_states22$panel),function(x) grep(x,Gene_states22$panel))

我也将其存储在列表中。 接下来,我使用lapply循环进行绘制。我将值存储在列表中,并使用subplot将所有内容放在一起:

library(plotly)

p_list<-lapply(1:length(mycols),function(j){
  columns<-mycols[[j]]

p<-plot_ly(
    type = "heatmap"
) %>% add_trace(
    y=rownames(my_mat_list$DF),x=colnames(my_mat_list$DF)[columns],
    z = my_mat_list$DF[,columns],
    xgap=3,ygap=3, text=my_mat_list_t$DF[,columns],hoverinfo="x+y+text",
    colorscale = DF_Color,
    colorbar = list(
        len = 0.3,
        y = 0.3,
        yanchor = 'top',
        title = 'DF series',
        tickvals = ''
    )
) %>% add_trace(
  y=rownames(my_mat_list$low),x=colnames(my_mat_list$low)[columns],
    z = my_mat_list$low[,columns],
    xgap=3,ygap=3,text=my_mat_list_t$low[,columns],hoverinfo="x+y+text",
    colorscale = lowColor,
    colorbar = list(
        len = 0.3,
        y = 0.3,
        yanchor = 'top',
        title = 'low series',
        tickvals = ''
    )
) %>% add_trace(
  y=rownames(my_mat_list$normal),x=colnames(my_mat_list$normal)[columns],
    z = my_mat_list$normal[,columns],
    xgap=3,ygap=3,text=my_mat_list_t$normal[,columns],hoverinfo="x+y+text",
    colorscale = normColor,
    colorbar = list(
        len = 0.3,
        y = 1,
        yanchor = 'top',
        title = 'normal series',
        tickvals = ''
    )
) %>% add_trace(
  y=rownames(my_mat_list$over),x=colnames(my_mat_list$over)[columns],
    z = my_mat_list$over[,columns],
    xgap=3,ygap=3,text=my_mat_list_t$over[,columns],hoverinfo="x+y+text",
    colorscale = overColor,
    colorbar = list(
        len = 0.3,
        y = 1,
        yanchor = 'top',
        title = 'over series',
        tickvals = ''
    )
 )
return(p)
})

subplot(p_list[[1]],p_list[[2]],shareY = TRUE) %>%
  layout(annotations = list(
 list(x = 0.2 , y = 1.05, text = levels(Gene_states22$panel)[1], showarrow = F, xref='paper', yref='paper'),
  list(x = 0.8 , y = 1.05, text = levels(Gene_states22$panel)[2], showarrow = F, xref='paper', yref='paper'))
)

enter image description here

可能的问题

  1. 您必须围绕dfgh之类的类别进行创建,这些类别只能出现一次。如果在R中仅选择一列,则输出将自动转换为(数字或字符)vector类型。因此,也许将as.matrix()添加到所有ztext自变量
  2. 悬停文本实际上并不起作用。但是plotly有很好的文档there。您应该能够弄清楚。
  3. 还必须在subplot函数中指定宽度。如果您有10个以上的类别,那就太奇怪了。
  4. 交互性并没有真正起作用。您无法删除痕迹。为什么?不知道。如果需要,请做一些研究。我想这与情节类型有关。
  5. 我建议以px为单位指定绘图的扩展。这可能会使磁贴更加相似。
  6. 最后,您将需要一些reference for the (subplot) titles,并且需要调整地块的边距。这样标题就可见了。