我想创建一个与结构图完全相同的堆叠条形图(使用程序distruct)。如何按公共因子对X标签进行分组,只显示一次该因子?例如,下面有来自2个人口的6个人,我希望只有2个以人口群为中心的标签。另外,有没有办法在每个组周围放置一个盒子?
这就是我所拥有的:
df <- data.frame(A1=c(0.000, 0.000, 0.020, 0.000, 0.000, 0.000),
A2=c(0.000, 0.000, 0.235, 0.195, 0.166, 0.205),
A3=c(0.065, 0.027, 0.000, 0.027, 0.000, 0.036),
A4=c(0.000, 0.000, 0.007, 0.011, 0.000, 0.000),
A5=c(0.000, 0.000, 0.000, 0.002, 0.028, 0.000),
A6=c(0.000, 0.041, 0.021, 0.068, 0.106, 0.105),
A7=c(0.093, 0.085, 0.001, 0.056, 0.110, 0.000),
A8=c(0.000, 0.000, 0.000, 0.000, 0.000, 0.029),
A9=c(0.000, 0.000, 0.058, 0.027, 0.096, 0.156),
A10=c(0.000, 0.023, 0.129, 0.012, 0.074, 0.117),
A11=c(0.000, 0.041, 0.000, 0.000, 0.000, 0.000),
A12=c(0.024, 0.000, 0.000, 0.000, 0.000, 0.000),
A13=c(0.817, 0.783, 0.527, 0.446, 0.258, 0.321),
A14=c(0.000, 0.000, 0.000, 0.006, 0.000, 0.000),
A15=c(0.000, 0.000, 0.000, 0.054, 0.143, 0.027),
A16=c(0.000, 0.000, 0.000, 0.000, 0.000, 0.003),
A17=c(0.000, 0.000, 0.000, 0.097, 0.019, 0.000))
barplot(t(df), col=rainbow(17), border=NA,space=0.05,
names.arg=c("1","1","2","2","2","2"),
xlab="Population", ylab="Ancestry")
这里大概是我想要的:
如果可能的话,最好设置space=
参数,并且仍然可以独立地为组之间的黑色边框选择宽度。
这是我引用的程序: http://pritchardlab.stanford.edu/structure.html
也许ggplot更适合这个?很抱歉,如果答案很明显,但我无法弄清楚。
注意我意识到您可以手动填写""
以查找特定的names.arg
值,但这对于庞大的数据集来说很费力,并不能很好地将标签集中在一起,并且不会解决边界问题。
答案 0 :(得分:2)
这是使用ggplot2的解决方案。我已经为主题和人口ID添加了列,我已经融化了数据。我用分面来对人群进行分组。不幸的是,ggplot2无法在图的底部放置构面标签。为此,我只建议使用Inkscape或Adobe Illustrator编辑pdf文件。
library(ggplot2)
library(reshape2)
# Add id and population label columns. Needed for melting and plotting.
df$population = c("p1", "p1", "p2", "p2", "p2", "p2")
df$subject_id = paste("id", 1:6, sep="")
# Melt (reshape data from wide format to long format).
mdat = melt(df, id.vars=c("subject_id", "population"),
variable.name="Ancestry", value.name="Fraction")
# Simple stacked bar plot:
p = ggplot(mdat, aes(x=subject_id, y=Fraction, fill=Ancestry)) +
geom_bar(stat="identity", position="stack") +
facet_grid(. ~ population, drop=TRUE, space="free", scales="free")
# Customized stacked bar plot:
# Sort ancestry order by overall 'abundance' of each ancestry.
mdat$Ancestry = factor(mdat$Ancestry,
levels=names(sort(colSums(df[, 1:17]), decreasing=TRUE)))
# Colors taken from:
# https://github.com/mbostock/d3/wiki/Ordinal-Scales#category20
col17 = c("#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd",
"#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf",
"#aec7e8", "#ffbb78", "#98df8a", "#ff9896", "#c5b0d5",
"#c49c94", "#c7c7c7")
names(col17) = levels(mdat$Ancestry)
p2 = ggplot(mdat, aes(x=subject_id, y=Fraction, fill=Ancestry, order=Ancestry)) +
geom_bar(stat="identity", position="fill", width=1, colour="grey25") +
facet_grid(. ~ population, drop=TRUE, space="free", scales="free") +
theme(panel.grid=element_blank()) +
theme(panel.background=element_rect(fill=NA, colour="grey25")) +
theme(panel.margin.x=grid:::unit(0.5, "lines")) +
theme(axis.title.x=element_blank()) +
theme(axis.text.x=element_blank()) +
theme(axis.ticks.x=element_blank()) +
theme(strip.background=element_blank()) +
theme(strip.text=element_text(size=12)) +
theme(legend.key=element_rect(colour="grey25")) +
scale_x_discrete(expand=c(0, 0)) +
scale_y_continuous(expand=c(0, 0)) +
scale_fill_manual(values=col17) +
guides(fill=guide_legend(override.aes=list(colour=NULL)))
library(gridExtra)
png("bar_plots.png", width=10, height=5, units="in", res=100)
grid.arrange(p, p2, nrow=1)
dev.off()