我有一个数据框“ A”,其中样本作为行,基因作为列,具有基因表达值(RPKM)。
Gene1 Gene2 Gene3 Gene4 Gene5 Gene6
Sample1 0.02 0.038798682 0.1423662 2.778587067 0.471403939 18.93687655
Sample2 0 0.059227225 0.208765213 0.818810739 0.353671882 1.379027685
Sample3 0 0.052116384 0.230437735 2.535040249 0.504061015 9.773089223
Sample4 0.06 0.199264618 0.261100548 2.516963635 0.63659138 11.01441624
Sample5 0 0.123521916 0.273330986 2.751309388 0.623572499 34.0563519
Sample6 0 0.128767634 0.263491811 2.882878373 0.359322715 13.02402045
Sample7 0 0.080097356 0.234511372 3.568192768 0.386217698 9.068928569
Sample8 0 0.017421323 0.247775683 5.109428797 0.068760572 15.7490551
Sample9 0 2.10281137 0.401582013 8.202902242 0.140596724 60.25989178
要绘制散点图以显示Gene1与Gene5和Gene6之间的相关性,我使用了以下代码:
library(tidyr)
library(ggplot2)
pdf("Gene1.pdf")
A %>%
gather(key = variable, value = values, Gene5:Gene6) %>%
ggplot(aes(Gene1, values)) +
geom_point() +
facet_grid(. ~ variable, scales = "free_x") +
geom_smooth(method = "lm", se = FALSE) +
scale_y_continuous(trans = "log2", labels = NULL, breaks = NULL) +
scale_x_continuous(trans = "log2", labels = NULL, breaks = NULL)
dev.off()
我也想为
Gene2 vs Gene5 and Gene6
Gene3 vs Gene5 and Gene6
Gene4 vs Gene5 and Gene6
当然,我可以用其他Gene替换代码中的Gene1
进行绘制。但是,除了手动替换外,我还想做一个循环,以便从Gene1到Gene4绘制针对Gene5和Gene6的图,每个图分别保存在pdf中并带有各自的基因名称。
根据要求,这是有关dput(A)
的更新:
structure(list(Gene1 = c(0.02, 0, 0, 0.06, 0, 0, 0, 0, 0), Gene2 = c(0.038798682,
0.059227225, 0.052116384, 0.199264618, 0.123521916, 0.128767634,
0.080097356, 0.017421323, 2.10281137), Gene3 = c(0.1423662, 0.208765213,
0.230437735, 0.261100548, 0.273330986, 0.263491811, 0.234511372,
0.247775683, 0.401582013), Gene4 = c(2.778587067, 0.818810739,
2.535040249, 2.516963635, 2.751309388, 2.882878373, 3.568192768,
5.109428797, 8.202902242), Gene5 = c(0.471403939, 0.353671882,
0.504061015, 0.63659138, 0.623572499, 0.359322715, 0.386217698,
0.068760572, 0.140596724), Gene6 = c(18.93687655, 1.379027685,
9.773089223, 11.01441624, 34.0563519, 13.02402045, 9.068928569,
15.7490551, 60.25989178)), class = "data.frame", row.names = c("Sample1",
"Sample2", "Sample3", "Sample4", "Sample5", "Sample6", "Sample7",
"Sample8", "Sample9"))
答案 0 :(得分:1)
循环会为您服务吗?我尚未测试(因为您没有dput()的数据),因此可能需要进行一些清理。
cols <- colnames(A)
cols <- cols[!cols %in% c("Gene5", "Gene6")]
for(i in cols){
name <- paste(i, ".pdf", sep = "")
id <- which(colnames(A) == i)
# add a new column - this is the one accepting the "rotating" gene input
A$Gene <- A[,id]
p <- A %>%
select(Gene, Gene5, Gene6) %>%
gather(variable, values, Gene5:Gene6) %>%
ggplot(aes(Gene, values)) +
geom_point() +
facet_grid(. ~ variable, scales = "free_x") +
geom_smooth(method = "lm", se = FALSE) +
scale_y_continuous(trans = "log2", labels = NULL, breaks = NULL) +
scale_x_continuous(i, trans = "log2", labels = NULL, breaks = NULL)
pdf(name)
print(p)
dev.off()
}
答案 1 :(得分:1)
如果您需要显式的for循环:
for (gene in paste0("Gene",1:4)){
a= A %>%
gather(key = variable, value = values, Gene5:Gene6) %>%
ggplot(aes(get(gene), values)) +
geom_point() +
facet_grid(. ~ variable, scales = "free_x") +
geom_smooth(method = "lm", se = FALSE) +
scale_y_continuous(trans = "log2", labels = NULL, breaks = NULL) +
scale_x_continuous(trans = "log2", labels = NULL, breaks = NULL)
print(a)
}