在ggplot2 facet plot中订购热图行

时间:2015-09-14 20:03:29

标签: r ggplot2 heatmap facet

我在ggplot2中遇到了多面热图渲染问题。我的想法是,我有几个元素(这些是现实生活中的基因)和几个实验(下面的例子中的F1和F2)。使用F1实验,我能够根据它们的平均表达式(高,......,中等,......,低)创建一类元素/基因。在下面通过示例生成的热图中,我想根据F1中的平均表达式值对每个类(01,02,03,04)中的每个元素进行排序。不幸的是,元素按字母顺序出现。我很乐意得到一些提示...... 最好

library(ggplot2)
library(reshape2)

set.seed(123)

# let's create a fake dataset
nb.experiment <- 4
n.row <- 200
n.col <- 5
d <- matrix(round(runif(n.row * n.col),2), nc=n.col) 
colnames(d) <- sprintf("%02d", 1:5)

# These strings will be the row names of each heatmap
# in the subsequent facet plot
elements <- sample(replicate(n.row/2, rawToChar(as.raw(sample(65:90, 6, replace=T)))))


# let's create a data.frame d
d <- data.frame(d, 
                experiment = sort(rep(c("F1","F2"), n.row/2)),
                elements= elements)

# For elements related to experiment F1 
# we artificially produce a gradient of values that will
# create elements with increasing row means
d[d$experiment =="F1",1:5] <- round(sweep(d[d$experiment =="F1",1:5], 
                                          1, 
                                          seq(from=1, 10, length.out = 100), 
                                          "+"), 2)
# For elements related to experiment F2
# we artificially produce a gradient of values that will
# create elements with decreasing row means
d[d$experiment =="F2",1:5] <- round(sweep(d[d$experiment =="F2",1:5], 
                                          1, 
                                          seq(from=10, 1, length.out = 100), 
                                          "+"), 2)

#print(d[d$experiment =="F1",1:5])

# Now we split the dataset by experiments
d.split <- split(d, d$experiment)

# For all experiments, we order elements based on the mean expression signal in 
# F1.
row.means.F1 <- rowMeans(d.split$F1[,1:5])
pos <- order(row.means.F1)

for(s in names(d.split)){
  d.split[[s]] <- d.split[[s]][pos,]
}


# We create several classes of elements based on their 
# mean expression signal in F1.
cuts <- cut(1:nrow(d.split$F1), nb.experiment)
levels(cuts) <- sprintf("%02d", 1:nb.experiment)

for(s in names(d.split)){
  d.split[[s]] <- split(d.split[[s]], cuts)
}



# Data are melt (their is perhaps a better solution...)
# in order to use the ggplot function
dm <- melt(do.call('rbind',lapply(d.split, melt)), id.var=c( "experiment", "elements", "variable", "L1"))
dm <- dm[, -5]
colnames(dm) <- c("experiment","elements", "pos", "rowMeanClass", "exprs")


# Now we plot the data
p <- ggplot(dm, aes(x = pos, y = elements, fill = exprs))
p <- p + geom_raster()                                        
p <- p + facet_wrap(~rowMeanClass +experiment , scales = "free", ncol = 2)
p <- p + theme_bw()
p <- p + theme(text = element_text(size=4))
p <- p + theme(text = element_text(family = "mono", face = "bold"))

ggsave("RPlot_test.jpeg", p)

example output http://oi57.tinypic.com/2cqfiwm.jpg

1 个答案:

答案 0 :(得分:0)

使用您的建议我能够找到一个解决方案(这意味着要清楚地指定&#39;元素因子的级别顺序)。谢谢hrbrmstr(和所有其他人)。

注意:我只添加了几行与原始代码相比,下面用&#39;添加:开始&#39;并且&#39;添加:结束&#39;标志。

library(ggplot2)
library(reshape2)

set.seed(123)

# let's create a fake dataset
nb.experiment <- 4
n.row <- 200
n.col <- 5
d <- matrix(round(runif(n.row * n.col),2), nc=n.col) 
colnames(d) <- sprintf("%02d", 1:5)

# These strings will be the row names of each heatmap
# in the subsequent facet plot
elements <- sample(replicate(n.row/2, rawToChar(as.raw(sample(65:90, 6, replace=T)))))


# let's create a data.frame d
d <- data.frame(d, 
                experiment = sort(rep(c("F1","F2"), n.row/2)),
                elements= elements)

# For elements related to experiment F1 
# we artificially produce a gradient of values that will
# create elements with increasing row means
d[d$experiment =="F1",1:5] <- round(sweep(d[d$experiment =="F1",1:5], 
                                          1, 
                                          seq(from=1, 10, length.out = 100), 
                                          "+"), 2)
# For elements related to experiment F2
# we artificially produce a gradient of values that will
# create elements with decreasing row means
d[d$experiment =="F2",1:5] <- round(sweep(d[d$experiment =="F2",1:5], 
                                          1, 
                                          seq(from=10, 1, length.out = 100), 
                                          "+"), 2)

#print(d[d$experiment =="F1",1:5])

# Now we split the dataset by experiments
d.split <- split(d, d$experiment)

# For all experiments, we order elements based on the mean expression signal in 
# F1.
row.means.F1 <- rowMeans(d.split$F1[,1:5])
pos <- order(row.means.F1)

for(s in names(d.split)){
  d.split[[s]] <- d.split[[s]][pos,]
}

## Added: begin ###
#Get the list of elements in proper order (based on row mean)
mean.order <- as.character(d.split$F1$elements)
## Added: end###

# We create several classes of elements based on their 
# mean expression signal in F1.
cuts <- cut(1:nrow(d.split$F1), nb.experiment)
levels(cuts) <- sprintf("%02d", 1:nb.experiment)

for(s in names(d.split)){
  d.split[[s]] <- split(d.split[[s]], cuts)
}



# Data are melt (their is perhaps a better solution...)
# in order to use the ggplot function
dm <- melt(do.call('rbind',lapply(d.split, melt)), id.var=c( "experiment", "elements", "variable", "L1"))
dm <- dm[, -5]
colnames(dm) <- c("experiment","elements", "pos", "rowMeanClass", "exprs")

## Added: begin###
#Ensure that dm$elements is an ordered factor with levels
# ordered as expected
dm$elements <- factor(dm$elements, levels = mean.order, ordered = TRUE)
## Added: end###

# Now we plot the data
p <- ggplot(dm, aes(x = pos, y = elements, fill = exprs))
p <- p + geom_raster()                                        
p <- p + facet_wrap(~rowMeanClass +experiment , scales = "free", ncol = 2)
p <- p + theme_bw()
p <- p + theme(text = element_text(size=4))
p <- p + theme(text = element_text(family = "mono", face = "bold"))

ggsave("RPlot_test.jpeg", p)