答案 0 :(得分:2)
第一次尝试答案 - 但需要更多工作才能使其真正起作用。具体而言,需要更仔细地考虑元素位置的对齐(以及它们的顺序)。
# library
library(ggplot2)
# create a dataset
specie=c(rep("sorgho" , 3) , rep("poacee" , 3) , rep("banana" , 3) , rep("triticum" , 3) )
condition=rep(c("normal" , "stress" , "Nitrogen") , 4)
value=abs(rnorm(12 , 0 , 15))
data=data.frame(specie,condition,value)
dend <- as.dendrogram(hclust(dist(with(data, tapply(value, specie, mean)))))
data$specie <- factor(data$specie, levels = labels(dend))
# Stacked Percent
library(dendextend)
p1 <- ggplot(dend, horiz = T)
p2 <- ggplot(data, aes(fill=condition, y=value, x=specie)) +
geom_bar( stat="identity", position="fill") + coord_flip()
library(cowplot)
plot_grid(p1, p2, align = "h")
答案 1 :(得分:0)
近三年后,仍然没有能够将堆叠的条形图与ggplot中的分层聚类结合在一起的软件包(至少我知道)。这是我基于该帖子joining a dendrogram and a heatmap的解决方案:
library(tidyverse)
library(phangorn)
library(vegan)
library(ggdendro)
library(dendextend)
library(ggsci)
library(cowplot)
## generate example data ####
set.seed(500)
combined_matrix <- data.frame(a=runif(14, 0, 33), b=runif(14, 0, 33), c=runif(14, 0, 33))
combined_matrix$d <- 100 - combined_matrix$a - combined_matrix$b - combined_matrix$c
row.names(combined_matrix) <- paste0("s", seq(1,14))
# vegan::vegdist() to calculate Bray-Curtis distance matrix
dm <- vegdist(combined_matrix, method = "bray")
# calculate UPGMA tree with phangorn::upgma() and convert to dendrogram
dendUPGMA <- as.dendrogram(upgma(dm))
plot_dendro_bars_v <- function(df, dend, taxonomy) {
#convert dendrogram to segment data
dend_data <- dendro_data(dend, type="rectangle")
segment_data <- dend_data[["segments"]]
#sample positions df
sample_pos_table <- with(dend_data$labels,
data.frame(x_center = x, sample = as.character(label), width = 0.9))
#prepare input data
ptdf <- rownames_to_column(df, var = "sample") %>%
pivot_longer(-sample, names_to = taxonomy, values_to = "Frequency") %>%
group_by(sample) %>%
mutate(Frequency = Frequency/100,
ymax = cumsum(Frequency/sum(Frequency)),
ymin = ymax - Frequency/sum(Frequency),
y_center = ymax-(Frequency/2)) %>%
left_join(sample_pos_table) %>%
mutate(xmin = x_center-width/2,
xmax = x_center+width/2)
#plot stacked bars
axis_limits <- with(sample_pos_table,
c(min(x_center - 0.5 * width), max(x_center + 0.5 * width))) +
0.1 * c(-1, 1) # extra spacing: 0.1
plt_hbars <- ggplot(ptdf,
aes_string(x = "x_center", y = "y_center", fill = taxonomy, xmin = "xmin", xmax = "xmax",
height = "Frequency", width = "width")) +
geom_tile() +
geom_rect(ymin = 0, ymax = 1, color = "black", fill = "transparent") +
scale_fill_rickandmorty() +
scale_y_continuous(expand = c(0, 0)) +
# For the y axis, alternatively set the labels as: gene_position_table$gene
scale_x_continuous(breaks = sample_pos_table[, "x_center"],
labels = sample_pos_table$sample,
limits = axis_limits,
expand = c(0, 0)) +
labs(x = "", y = "Frequency") +
theme_bw() +
theme(# margin: top, right, bottom, and left
plot.margin = unit(c(-0.9, 0.2, 1, 0.2), "cm"),
panel.grid.minor = element_blank())
#plot dendrogram
plt_dendr <- ggplot(segment_data) +
geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) +
scale_y_continuous(expand = c(0, 0.05)) +
scale_x_continuous(breaks = sample_pos_table$x_center,
labels = rep("", nrow(sample_pos_table)),
limits = axis_limits,
expand = c(0, 0)) +
labs(x = "", y = "Distance", colour = "", size = "") +
theme_bw() +
theme(panel.grid.minor = element_blank(),
panel.grid.major = element_blank())
#combine plots
comb <- plot_grid(plt_dendr, plt_hbars, align = 'v', ncol = 1, axis = "lr", rel_heights = c(0.3, 1))
comb
}
plot_dendro_bars_v(df = combined_matrix, dend = dendUPGMA, taxonomy = "example")
或水平
plot_dendro_bars_h <- function(df, dend, taxonomy) {
#convert dendrogram to segemnt data
dend_data <- dendro_data(dend, type="rectangle")
segment_data <- with(segment(dend_data),
data.frame(x = y, y = x, xend = yend, yend = xend))
#sample positions df
sample_pos_table <- with(dend_data$labels,
data.frame(y_center = x, sample = as.character(label), height = 0.9))
#prepare input data
ptdf <- rownames_to_column(df, var = "sample") %>%
pivot_longer(-sample, names_to = taxonomy, values_to = "Frequency") %>%
group_by(sample) %>%
mutate(Frequency = Frequency/100,
xmax = cumsum(Frequency/sum(Frequency)),
xmin = xmax - Frequency/sum(Frequency),
x_center = xmax-(Frequency/2)) %>%
left_join(sample_pos_table) %>%
mutate(ymin = y_center-height/2,
ymax = y_center+height/2)
#plot stacked bars
axis_limits <- with(sample_pos_table,
c(min(y_center - 0.5 * height), max(y_center + 0.5 * height))) +
0.1 * c(-1, 1) # extra spacing: 0.1
plt_hbars <- ggplot(ptdf,
aes_string(x = "x_center", y = "y_center", fill = taxonomy, ymin = "ymin", ymax = "ymax",
height = "height", width = "Frequency")) +
geom_tile() +
geom_rect(xmin = 0, xmax = 1, color = "black", fill = "transparent") +
scale_fill_rickandmorty() +
scale_x_continuous(expand = c(0, 0)) +
# For the y axis, alternatively set the labels as: gene_position_table$gene
scale_y_continuous(breaks = sample_pos_table[, "y_center"],
labels = rep("", nrow(sample_pos_table)),
limits = axis_limits,
expand = c(0, 0)) +
labs(x = "Frequency", y = "") +
theme_bw() +
theme(# margin: top, right, bottom, and left
plot.margin = unit(c(1, 0.2, 0.2, -0.9), "cm"),
panel.grid.minor = element_blank())
#plot dendrogram
plt_dendr <- ggplot(segment_data) +
geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) +
scale_x_reverse(expand = c(0, 0.05)) +
scale_y_continuous(breaks = sample_pos_table$y_center,
labels = sample_pos_table$sample,
limits = axis_limits,
expand = c(0, 0)) +
labs(x = "Distance", y = "", colour = "", size = "") +
theme_bw() +
theme(panel.grid.minor = element_blank(),
panel.grid.major = element_blank())
#combine plots
comb <- plot_grid(plt_dendr, plt_hbars, align = 'h', rel_widths = c(0.3, 1))
return(comb)
}
plot_dendro_bars_h(df = combined_matrix, dend = dendUPGMA, taxonomy = "example")
数据可以与可以强制生成树状图的任何树(例如UniFrac树)组合。玩得开心,罗马。
答案 2 :(得分:0)
这是我的 Roman_G 脚本版本。它在条形内显示百分比,并使用 vegan::reorder.hclust
对树状图的分支重新排序,以便第一列具有最高值的行倾向于放置在顶部,而具有最高值的行倾向于放置在最后一列往往放在底部。我还删除了额外的边距、刻度和轴。
library(tidyverse)
library(ggdendro)
library(vegan)
library(colorspace)
library(cowplot)
t=read.table(text="Spain_EN 0.028152 0.971828 0.000010 0.000010
Norway_Mesolithic 0.784705 0.083387 0.000010 0.131898
Russia_Sunghir4 0.000010 0.000010 0.999970 0.000010
Iran_Wezmeh_N 0.000010 0.492331 0.383227 0.124433
Russia_DevilsCave_N 0.000010 0.000010 0.000010 0.999970
Italy_North_Villabruna_HG 0.999970 0.000010 0.000010 0.000010
Russia_HG_Karelia 0.527887 0.133179 0.072342 0.266593
Russia_Yana_UP 0.000010 0.000014 0.999966 0.000010
Georgia_Kotias 0.000010 0.537322 0.381313 0.081355
China_SEastAsia_Island_EN 0.000010 0.000010 0.148652 0.851328
Turkey_N 0.000010 0.999970 0.000010 0.000010
USA_Ancient_Beringian 0.008591 0.000010 0.095008 0.896391
Russia_Sidelkino_HG 0.624076 0.045350 0.105615 0.224958
Russia_Kolyma_M 0.020197 0.000010 0.000010 0.979783
China_Tianyuan 0.000010 0.000010 0.423731 0.576249",row.names=1)
hc=hclust(dist(t),method="ward.D2")
hc=reorder(hc,wts=-as.matrix(t)%*%seq(ncol(t))^2) # vegan::reorder.hclust
tree=ggdendro::dendro_data(as.dendrogram(hc),type="rectangle")
p1=ggplot(ggdendro::segment(tree))+
geom_segment(aes(x=y,y=x,xend=yend,yend=xend),lineend="round",size=.4)+
scale_x_continuous(expand=expansion(add=c(0,.01)))+ # don't crop half of line between top-level nodes
scale_y_continuous(limits=.5+c(0,nrow(t)),expand=c(0,0))+
theme(
axis.text=element_blank(),
axis.ticks=element_blank(),
axis.ticks.length=unit(0,"pt"), # remove extra space occupied by ticks
axis.title=element_blank(),
panel.background=element_rect(fill="white"),
panel.grid=element_blank(),
plot.margin=margin(5,5,5,0)
)
t=t[hc$labels[hc$order],]
t2=data.frame(V1=rownames(t)[row(t)],V2=colnames(t)[col(t)],V3=unname(do.call(c,t)))
lab=round(100*t2$V3)
lab[lab==0]=""
p2=ggplot(t2,aes(x=factor(V1,level=rownames(t)),y=V3,fill=V2))+
geom_bar(stat="identity",width=1,position=position_fill(reverse=T))+
geom_text(aes(label=lab),position=position_stack(vjust=.5,reverse=T),size=3.5)+
coord_flip()+
scale_x_discrete(expand=c(0,0))+
scale_y_discrete(expand=c(0,0))+
scale_fill_manual(values=colorspace::hex(HSV(head(seq(0,360,length.out=ncol(t)+1),-1),.5,1)))+
theme(
axis.text=element_text(color="black",size=11),
axis.text.x=element_blank(),
axis.ticks=element_blank(),
axis.title=element_blank(),
legend.position="none",
plot.margin=margin(5,0,5,5)
)
cowplot::plot_grid(p2,p1,rel_widths=c(1,.4))
ggsave("a.png",height=.25*nrow(t),width=7)
还有来自 scale_x_dendrogram
的 scale_y_dendrogram
和 ggh4x
,它们使用 ggdendro::dendro_data
: https://teunbrand.github.io/ggh4x/articles/PositionGuides.html#dendrograms。但是我无法让它们使用 coord_flip
处理水平堆叠条。
library(ggh4x)
t=head(USArrests,20)
t2=data.frame(V1=rownames(t)[row(t)],V2=colnames(t)[col(t)],V3=unname(do.call(c,t)))
hc=hclust(dist(t))
ggplot(t2,aes(x=factor(V1,level=rownames(t)),y=V3,fill=V2))+
geom_bar(stat="identity",width=1,position=position_stack(reverse=F))+
geom_text(aes(label=round(V3)),position=position_stack(vjust=.5,reverse=F),size=3)+
scale_x_dendrogram(hclust=hc)+
scale_y_discrete(expand=c(0,0))+
# scale_fill_manual(values=colorspace::hex(HSV(head(seq(0,360,length.out=ncol(t)+1),-1),.5,1)))+
theme(
axis.text=element_text(color="black",size=11),
axis.text.x=element_text(angle=90,hjust=1,vjust=.5),
axis.text.y=element_blank(),
axis.ticks=element_blank(),
axis.ticks.length=unit(14,"pt"), # height of dendrogram
axis.title=element_blank(),
legend.justification=c(0,1),
legend.key=element_rect(fill=NA), # remove gray border around color squares
legend.margin=margin(-6,0,0,0),
legend.position=c(0,1),
legend.title=element_blank(),
panel.background=element_rect(fill="white"),
plot.margin=margin(5,0,5,5)
)
ggsave("a.png",height=6,width=6)
编辑:第三个选项是使用 circlize
: https://jokergoo.github.io/circlize/reference/circos.barplot.html。
library(circlize)
library(vegan) # for reorder.hclust (may be masked by `seriation`)
library(dendextend) # for color_branches
t=read.table(text="Kalmyk 0.119357 0.725057 0.000010 0.037803 0.117774
Kyrgyz_China 0.039367 0.512079 0.230150 0.095038 0.123366
Altaian_Chelkan 0.034095 0.000010 0.919478 0.000010 0.046407
Azeri 0.051638 0.004671 0.010727 0.902646 0.030318
Uzbek 0.102725 0.273261 0.001854 0.452126 0.170033
Salar 0.000010 0.539636 0.460334 0.000010 0.000010
Tatar_Kazan 0.113456 0.057026 0.000010 0.099336 0.730171
Tatar_Siberian 0.251376 0.221389 0.000010 0.077505 0.449721
Finnish 0.007214 0.000010 0.000010 0.015174 0.977592
Yakut 0.505434 0.473202 0.000010 0.002914 0.018440
Mansi 0.572791 0.000010 0.000010 0.000010 0.427179
Altaian 0.222424 0.335614 0.358801 0.032694 0.050468
Shor_Mountain 0.233984 0.000010 0.724596 0.000010 0.041400
Chuvash 0.180171 0.011056 0.000010 0.006462 0.802301
Enets 0.920409 0.000010 0.000010 0.000010 0.079561
Yukagir_Tundra 0.710359 0.289611 0.000010 0.000011 0.000010
Kyrgyz_Tajikistan 0.104000 0.563708 0.000010 0.125799 0.206483
Khakass_Kachin 0.254253 0.416760 0.174200 0.005262 0.149525
Tuvinian 0.448940 0.448899 0.000010 0.031803 0.070348
Besermyan 0.209841 0.001487 0.000010 0.000460 0.788202
Nogai_Astrakhan 0.062497 0.463590 0.000010 0.183203 0.290701
Todzin 0.725173 0.257670 0.000010 0.005836 0.011312
Kazakh 0.067027 0.518213 0.087979 0.114550 0.212231
Tofalar 0.815599 0.110299 0.000010 0.009693 0.064398
Karakalpak 0.009983 0.316964 0.389103 0.158275 0.125676
Estonian 0.000010 0.000010 0.000010 0.004409 0.995561
Dolgan 0.694025 0.255361 0.000010 0.049624 0.000979
Tatar_Siberian_Zabolotniye 0.521637 0.020132 0.000010 0.000010 0.458212
Uyghur 0.043578 0.486742 0.000010 0.318983 0.150687
Udmurt 0.256391 0.000010 0.001010 0.000010 0.742579
Evenk_FarEast 0.241328 0.606202 0.000010 0.000010 0.152451
Selkup 0.804662 0.000010 0.000010 0.000010 0.195308
Kumyk 0.060751 0.000112 0.000010 0.823905 0.115222
Hungarian 0.000010 0.000010 0.000010 0.244311 0.755659
Tubalar 0.159517 0.009457 0.802778 0.000010 0.028238
Turkmen 0.123631 0.226543 0.000010 0.529793 0.120023
Karelian 0.012854 0.000010 0.000010 0.000010 0.987116
Kazakh_China 0.074285 0.573009 0.152931 0.069362 0.130412
Mongol 0.033174 0.847004 0.025135 0.005178 0.089509
Daur 0.000010 0.995215 0.000010 0.000010 0.004755
Evenk_Transbaikal 0.611414 0.388556 0.000010 0.000010 0.000010
Nogai_Karachay_Cherkessia 0.119988 0.120774 0.000010 0.617261 0.141967
Veps 0.026887 0.000010 0.000010 0.000010 0.973083
Even 0.441349 0.278457 0.000010 0.015239 0.264946
Nganasan 0.999960 0.000010 0.000010 0.000010 0.000010
Bashkir 0.114088 0.056493 0.251488 0.030390 0.547542
Xibo 0.000010 0.985541 0.000010 0.000362 0.014077
Khakass 0.202707 0.171413 0.530905 0.007675 0.087300
Shor_Khakassia 0.258218 0.000010 0.694889 0.000010 0.046873
Nanai 0.105903 0.894067 0.000010 0.000010 0.000010
Buryat 0.064420 0.848458 0.017066 0.001696 0.068360
Yukagir_Forest 0.379416 0.096266 0.000010 0.003580 0.520728
Karachai 0.067138 0.004534 0.000010 0.798982 0.129336
Mordovian 0.022303 0.001193 0.000010 0.025251 0.951243
Turkish_Balikesir 0.092314 0.038550 0.000010 0.804964 0.064163
Turkish 0.040918 0.012255 0.000010 0.873179 0.073639
Kyrgyz_Kyrgyzstan 0.090129 0.607265 0.000010 0.122885 0.179711
Balkar 0.075115 0.000010 0.000010 0.829730 0.095136
Gagauz 0.000010 0.027887 0.015891 0.601619 0.354593
Nogai_Stavropol 0.070584 0.403817 0.000010 0.244701 0.280888
Negidal 0.248518 0.751452 0.000010 0.000010 0.000010
Tatar_Mishar 0.066112 0.037441 0.010377 0.138008 0.748062",row.names=1)
hc=hclust(dist(t))
hc=reorder(hc,-(t[,1]+t[,2]-t[,4]-2*t[,5]))
labelcolor=hcl(c(260,90,120,60,0,210,180,310)+15,60,70)
barcolor=hcl(c(310,260,120,60,210)+15,60,70)
labels=hc$labels[hc$order]
cut=cutree(hc,8)
dend=color_branches(as.dendrogram(hc),k=length(unique(cut)),col=labelcolor[unique(cut[labels])])
t=t[hc$labels[hc$order],]
circos.clear()
png("a.png",w=2500,h=2500,res=300)
circos.par(cell.padding=c(0,0,0,0),gap.degree=5,points.overflow.warning=F)
circos.initialize("a",xlim=c(0,nrow(t)))
circos.track(ylim=c(0,1),bg.border=NA,track.height=.28,track.margin=c(.01,0),
panel.fun=function(x,y)for(i in 1:nrow(t))circos.text(i-.5,0,labels[i],adj=c(0,.5),facing="clockwise",niceFacing=T,cex=.65,col=labelcolor[cut[labels[i]]]))
circos.track(ylim=c(0,1),track.margin=c(0,0),track.height=.35,bg.lty=0,panel.fun=function(x,y){
mat=as.matrix(t)
pos=1:nrow(mat)-.5
barwidth=1
for(i in 1:ncol(mat)){
seq1=rowSums(mat[,seq(i-1),drop=F])
seq2=rowSums(mat[,seq(i),drop=F])
circos.rect(pos-barwidth/2,if(i==1){0}else{seq1},pos+barwidth/2,seq2,col=barcolor[i],border="gray20",lwd=.1)
}
for(i in 1:ncol(mat)){
seq1=rowSums(mat[,seq(i-1),drop=F])
seq2=rowSums(mat[,seq(i),drop=F])
lab=round(100*t[,i])
lab[lab<=1]=""
circos.text(pos,if(i==1){seq1/2}else{seq1+(seq2-seq1)/2},labels=lab,col="gray10",cex=.4,facing="downward")
}
})
circos.track(ylim=c(0,attr(dend,"height")),bg.border=NA,track.margin=c(0,.0015),track.height=.35,panel.fun=function(x,y)circos.dendrogram(dend))
circos.clear()
dev.off()