我有一个数据框:
gene_symbol<-c("DADA","SDAASD","SADDSD","SDADD","ASDAD","XCVXCVX","EQWESDA","DASDADS","SDASDASD","DADADASD","sdaadfd","DFSD","SADADDAD","SADDADADA","DADSADSASDWQ","SDADASDAD","ASD","DSADD")
panel<-c("growth","growth","growth","growth","big","big","big","small","small","dfgh","DF","DF","DF","DF","DF","gh","DF","DF")
ASDDA<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDb<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf1<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf2<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf3<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf4<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf5<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDA1<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDb1<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf1<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf11<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf21<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf31<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf41<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf51<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
Gene_states22 <- data.frame(gene_symbol, panel, ASDDA, ASDDb, ASDDAf, ASDDAf1, ASDDAf2,
ASDDAf3, ASDDAf4, ASDDAf5, ASDDA1, ASDDb1, ASDDAf1, ASDDAf11,
ASDDAf21, ASDDAf31, ASDDAf41, ASDDAf51)
然后我创建一个热图:
library(ggplot2); library(reshape2)
HG3 <- split(Gene_states22[,1:15], Gene_states22$panel)
HG4 <- melt(HG3, id.vars= c("gene_symbol","panel"))
HG4 <- HG4[,-5]
pp <- ggplot(HG4, aes(gene_symbol,variable)) +
geom_tile(aes(fill = value),
colour = "grey50") +
facet_grid(~panel, scales = "free" ,space = "free") +
scale_fill_manual(values = c("white", "red", "blue", "black", "yellow", "green", "brown"))
如您所见,我使用facet_grid根据面板值将热图分成几组。问题是,当我使用ggplotly(pp)
时,列宽度因组而异,并且我的图看起来很丑。
为了解决该问题,我使用了Plotly and ggplot with facet_grid in R: How to to get yaxis labels to use ticktext value instead of range value? 的改编答案:
library(plotly)
library(ggplot2)
library(data.table)
library(datasets)
#add fake model for use in facet
dt<-data.table(HG4[1:50,])
dt[,variable:=rownames(HG4)]
dt[,panel:=substr(variable,1,regexpr(" ",variable)-1)][panel=="",panel:=variable]
ggplot.test<-ggplot(dt,aes(gene_symbol,variable))+facet_grid(panel~.,scales="free_y",space="free",drop=TRUE)+
geom_tile(aes(fill = value),
colour = "grey50") +
scale_fill_manual(values = c("white", "red", "blue", "black", "yellow", "green", "brown")) +
labs(title = "Heatmap", x = "gene_symbol", y = "sample", fill = "value") +
guides(fill = FALSE)+
theme(panel.background = element_rect(fill = NA),
panel.spacing = unit(0.5, "lines"), ## It was here where you had a 0 for distance between facets. I replaced it by 0.5 .
strip.placement = "outside")
p <- ggplotly(ggplot.test)
len <- length(unique(HG4$panel))
total <- 1
for (i in 2:len) {
total <- total + length(p[['x']][['layout']][[paste('yaxis', i, sep='')]][['ticktext']])
}
spacer <- 0.01 #space between the horizontal plots
total_length = total + len * spacer
end <- 1
start <- 1
for (i in c('', seq(2, len))) {
tick_l <- length(p[['x']][['layout']][[paste('yaxis', i, sep='')]][['ticktext']]) + 1
#fix the y-axis
p[['x']][['layout']][[paste('yaxis', i, sep='')]][['tickvals']] <- seq(1, tick_l)
p[['x']][['layout']][[paste('yaxis', i, sep='')]][['ticktext']][[tick_l]] <- ''
end <- start - spacer
start <- start - (tick_l - 1) / total_length
v <- c(start, end)
#fix the size
p[['x']][['layout']][[paste('yaxis', i, sep='')]]$domain <- v
}
p[['x']][['layout']][['annotations']][[3]][['y']] <- (p[['x']][['layout']][['yaxis']]$domain[2] + p[['x']][['layout']][['yaxis']]$domain[1]) /2
p[['x']][['layout']][['shapes']][[2]][['y0']] <- p[['x']][['layout']][['yaxis']]$domain[1]
p[['x']][['layout']][['shapes']][[2]][['y1']] <- p[['x']][['layout']][['yaxis']]$domain[2]
#fix the annotations
for (i in 3:len + 1) {
#fix the y position
p[['x']][['layout']][['annotations']][[i]][['y']] <- (p[['x']][['layout']][[paste('yaxis', i - 2, sep='')]]$domain[1] + p[['x']][['layout']][[paste('yaxis', i - 2, sep='')]]$domain[2]) /2
#trim the text
p[['x']][['layout']][['annotations']][[i]][['text']] <- substr(p[['x']][['layout']][['annotations']][[i]][['text']], 1, length(p[['x']][['layout']][[paste('yaxis', i - 2, sep='')]][['ticktext']]) * 3 - 3)
}
#fix the rectangle shapes in the background
for (i in seq(0,(len - 2) * 2, 2)) {
p[['x']][['layout']][['shapes']][[i+4]][['y0']] <- p[['x']][['layout']][[paste('yaxis', i /2 + 2, sep='')]]$domain[1]
p[['x']][['layout']][['shapes']][[i+4]][['y1']] <- p[['x']][['layout']][[paste('yaxis', i /2 + 2, sep='')]]$domain[2]
}
p
答案 0 :(得分:1)
所以首先要注意的是:
就您而言,我什至不确定plotly
热图是否是您所需要的。另外,您应该从不将复杂的ggplot
转换为可打印。它将失败!在90%的情况下。尝试在plotly
或您希望结束的任何地方重新创建图。其他任何事情最终都会导致编码地狱。
我从做一些研究开始:
从帖子1开始,我知道我必须为您数据中的每个级别创建一个矩阵。所以我为此写了一个函数:
mymat<-as.matrix(Gene_states22[,-1:-2])
### Creates a 1-NA dummy matrix for each level. The output is stored in a list
dummy_mat<-function(mat,levels,names_col){
mat_list<-lapply(levels,function(x){
mat[mat!=x]=NA
mat[mat==x]=1
mymat=t(apply(mat,2,as.numeric))
colnames(mymat)=names_col
return(mymat)
})
names(mat_list)=levels
return(mat_list)
}
my_mat_list<-dummy_mat(mymat,c('DF','low','normal','over'),Gene_states22$gene_symbol)
### Optional: The heatmap type is peculiar - I created a text-NA matrix for each category as well
text_mat<-function(mat,levels,names_col){
mat_list<-lapply(levels,function(x){
mat[mat!=x]=NA
mat=t(mat)
colnames(mat)=names_col
return(mat)
})
names(mat_list)=levels
return(mat_list)
}
my_mat_list_t<-text_mat(mymat,c('DF','low','normal','over'),as.character(Gene_states22$gene_symbol))
此外,我需要每个级别的颜色。这些颜色是使用某些数据框创建的。您也可以在此处编写类似的(lapply
-)循环:
DF_Color <- data.frame(x = c(0,1), y = c("#DEDEDE", "#DEDEDE"))
colnames(DF_Color) <- NULL
lowColor <- data.frame(x = c(0,1), y = c("#00CCFF", "#00CCFF"))
colnames(lowColor) <- NULL
normColor <- data.frame(x = c(0,1), y = c("#DEDE00", "#DEDE00"))
colnames(normColor) <- NULL
overColor <- data.frame(x = c(0,1), y = c("#DE3333", "#DE3333"))
colnames(overColor) <- NULL
此外,我们需要每个面板类别的矩阵列:
mycols<-lapply(levels(Gene_states22$panel),function(x) grep(x,Gene_states22$panel))
我也将其存储在列表中。
接下来,我使用lapply循环进行绘制。我将值存储在列表中,并使用subplot
将所有内容放在一起:
library(plotly)
p_list<-lapply(1:length(mycols),function(j){
columns<-mycols[[j]]
p<-plot_ly(
type = "heatmap"
) %>% add_trace(
y=rownames(my_mat_list$DF),x=colnames(my_mat_list$DF)[columns],
z = my_mat_list$DF[,columns],
xgap=3,ygap=3, text=my_mat_list_t$DF[,columns],hoverinfo="x+y+text",
colorscale = DF_Color,
colorbar = list(
len = 0.3,
y = 0.3,
yanchor = 'top',
title = 'DF series',
tickvals = ''
)
) %>% add_trace(
y=rownames(my_mat_list$low),x=colnames(my_mat_list$low)[columns],
z = my_mat_list$low[,columns],
xgap=3,ygap=3,text=my_mat_list_t$low[,columns],hoverinfo="x+y+text",
colorscale = lowColor,
colorbar = list(
len = 0.3,
y = 0.3,
yanchor = 'top',
title = 'low series',
tickvals = ''
)
) %>% add_trace(
y=rownames(my_mat_list$normal),x=colnames(my_mat_list$normal)[columns],
z = my_mat_list$normal[,columns],
xgap=3,ygap=3,text=my_mat_list_t$normal[,columns],hoverinfo="x+y+text",
colorscale = normColor,
colorbar = list(
len = 0.3,
y = 1,
yanchor = 'top',
title = 'normal series',
tickvals = ''
)
) %>% add_trace(
y=rownames(my_mat_list$over),x=colnames(my_mat_list$over)[columns],
z = my_mat_list$over[,columns],
xgap=3,ygap=3,text=my_mat_list_t$over[,columns],hoverinfo="x+y+text",
colorscale = overColor,
colorbar = list(
len = 0.3,
y = 1,
yanchor = 'top',
title = 'over series',
tickvals = ''
)
)
return(p)
})
subplot(p_list[[1]],p_list[[2]],shareY = TRUE) %>%
layout(annotations = list(
list(x = 0.2 , y = 1.05, text = levels(Gene_states22$panel)[1], showarrow = F, xref='paper', yref='paper'),
list(x = 0.8 , y = 1.05, text = levels(Gene_states22$panel)[2], showarrow = F, xref='paper', yref='paper'))
)
可能的问题:
dfgh
之类的类别进行创建,这些类别只能出现一次。如果在R
中仅选择一列,则输出将自动转换为(数字或字符)vector
类型。因此,也许将as.matrix()
添加到所有z
和text
自变量plotly
有很好的文档there。您应该能够弄清楚。 subplot
函数中指定宽度。如果您有10个以上的类别,那就太奇怪了。