我正在使用R dendextend包来绘制hclust {stats}中每个hclust方法生成的hclust树对象:“ward.D”,“ward.D2”,“single”,“complete”,“average”(= UPGMA),“mcquitty”(= WPGMA),“中位数”(= WPGMC)或“质心”(= UPGMC)。
我注意到当使用method =“median”或“centroid”时,color_branches的颜色编码失败。
我用随机生成的矩阵测试了它,并且错误被复制为“中位数”和“质心”方法,是否有特定原因?
请参阅输出图的链接: fig1. hclust methods (a) ward.D2, (b) median, (c) centroid
library(dendextend)
set.seed(1)
df <- as.data.frame(replicate(10, rnorm(20)))
df.names <- rep(c("black", "red", "blue", "green", "cyan"), 2)
df.col <- rep(c("black", "red", "blue", "green", "cyan"), 2)
colnames(df) <- df.names
df.dist <- dist(t(df), method = "euclidean")
# plotting works for "ward.D", "ward.D2", "single", "complete", "average", "mcquitty"
dend <- as.dendrogram(hclust(df.dist, method = "ward.D2"), labels = df.names)
labels_colors(dend) <- df.col[order.dendrogram(dend)]
dend.colorBranch <- color_branches(dend, k = length(df.names), col = df.col[order.dendrogram(dend)])
dend.colorBranch %>% set("branches_lwd", 3) %>% plot(horiz = TRUE)
# color_branches fails for "median" or "centroid"
dend <- as.dendrogram(hclust(df.dist, method = "median"), labels = df.names)
labels_colors(dend) <- df.col[order.dendrogram(dend)]
dend.colorBranch <- color_branches(dend, k = length(df.names), col = df.col[order.dendrogram(dend)])
dend.colorBranch %>% set("branches_lwd", 3) %>% plot(horiz = TRUE)
dend <- as.dendrogram(hclust(df.dist, method = "centroid"), labels = df.names)
labels_colors(dend) <- df.col[order.dendrogram(dend)]
dend.colorBranch <- color_branches(dend, k = length(df.names), col = df.col[order.dendrogram(dend)])
dend.colorBranch %>% set("branches_lwd", 3) %>% plot(horiz = TRUE)
我正在使用dendextend_1.4.0。会议信息如下:
sessionInfo()
R version 3.3.2 (2016-10-31)
Platform: x86_64-apple-darwin13.4.0 (64-bit)
Running under: macOS Sierra 10.12.3
感谢。
答案 0 :(得分:1)
您可以使用branches_attr_by_clusters
解决此问题(虽然它可能会有点棘手,请参阅下面的示例):
library(dendextend)
set.seed(1)
df <- as.data.frame(replicate(10, rnorm(20)))
df.names <- rep(c("black", "red", "blue", "green", "cyan"), 2)
df.col <- rep(c("black", "red", "blue", "green", "cyan"), 2)
colnames(df) <- df.names
df.dist <- dist(t(df), method = "euclidean")
# plotting works for "ward.D", "ward.D2", "single", "complete", "average", "mcquitty"
dend <- as.dendrogram(hclust(df.dist, method = "ward.D2"), labels = df.names)
labels_colors(dend) <- df.col[order.dendrogram(dend)]
dend.colorBranch <- color_branches(dend, k = length(df.names), col = df.col[order.dendrogram(dend)])
dend.colorBranch %>% set("branches_lwd", 3) %>% plot(horiz = TRUE)
# color_branches fails for "median" or "centroid"
dend <- as.dendrogram(hclust(df.dist, method = "median"), labels = df.names)
aa <- df.col[order.dendrogram(dend)]
labels_colors(dend) <- aa
dend.colorBranch <- color_branches(dend, k = length(df.names), col = df.col[order.dendrogram(dend)])
dend.colorBranch %>% set("branches_lwd", 3) %>% plot(horiz = TRUE)
aa <- factor(aa, levels = unique(aa))
dend %>% branches_attr_by_clusters(aa, value = levels(aa)) %>% plot