以下是我正在使用的数据框的示例。
> head(tbl[,c('logFC', 'CI_L', 'CI_R', "adj_P_Value","gene",'Group1','Group2', 'Study_ID')])
logFC CI_L CI_R adj_P_Value gene Group1 Group2 Study_ID
1 -0.09017596 -0.43955752 0.25920561 1 CD244 Male Female GSE2461
2 0.08704844 -0.26134341 0.43544028 1 CD244 ulcerative colitis irritable bowel syndrome GSE2461
3 -0.03501474 -0.12677636 0.05674688 1 CD244 nonlesional skin lesional skin GSE27887
4 0.01096914 -0.08064105 0.10257932 1 CD244 pretreatment posttreatment GSE27887
5 -0.03707265 -0.12407201 0.04992672 1 CD244 Infliximab Before treatment GSE42296
6 0.07644834 -0.02849309 0.18138977 1 CD244 Responder Nonresponder GSE42296
> dput(droplevels(head(tbl, 4)))
structure(list(Probe_gene = c("211828_s_at", "213107_at", "213109_at",
"211828_s_at"), logFC = c(0.299038590078202, 0.110797898105632,
0.183214738942169, -0.733505457149486), CI_L = c(-0.0332844208935414,
-0.246475718463096, -0.103358698007331, -1.06488707237429), CI_R = c(0.631361601049945,
0.46807151467436, 0.469788175891669, -0.402123841924678), AveExpr = c(7.38827278419383,
7.83576862202959, 6.68411901305011, 7.38827278419383), t = c(2.08930195860002,
0.720053829585981, 1.48442706763586, -5.13936340603241), P_Value = c(0.0714526369900392,
0.492771856681782, 0.177447421180599, 0.000998740960213292),
adj_P_Value = c(1, 1, 1, 1), B = c(-4.07430683864883, -5.56181503167371,
-4.83144498851773, -0.294306065125513), gene = c("TNIK",
"TNIK", "TNIK", "TNIK"), Study_ID = c("GSE2461", "GSE2461",
"GSE2461", "GSE2461"), Group1 = c("Male", "Male", "Male",
"ulcerative colitis"), Group2 = c("Female", "Female", "Female",
"irritable bowel syndrome"), Study_ID = c("GSE2461", "GSE2461",
"GSE2461", "GSE2461"), Disease = c("irritable bowel syndrome; ulcerative colitis",
"irritable bowel syndrome; ulcerative colitis", "irritable bowel syndrome; ulcerative colitis",
"irritable bowel syndrome; ulcerative colitis"), DOID = c(9778L,
9778L, 9778L, 9778L), Title = c("Control (IBS) & Ulcerative colitis (UC) subjects",
"Control (IBS) & Ulcerative colitis (UC) subjects", "Control (IBS) & Ulcerative colitis (UC) subjects",
"Control (IBS) & Ulcerative colitis (UC) subjects"), GEO_Platform_ID = c("GPL96",
"GPL96", "GPL96", "GPL96"), Platform = c("Affymetrix Human U133A Array",
"Affymetrix Human U133A Array", "Affymetrix Human U133A Array",
"Affymetrix Human U133A Array"), PMID = c(0L, 0L, 0L, 0L),
Organism = c("Homo sapiens", "Homo sapiens", "Homo sapiens",
"Homo sapiens"), Data_Type = c("RNA", "RNA", "RNA", "RNA"
), Biomaterial = c("Colonic Mucosal biopsy", "Colonic Mucosal biopsy",
"Colonic Mucosal biopsy", "Colonic Mucosal biopsy"), Study_Type = c("in vivo",
"in vivo", "in vivo", "in vivo"), Samples = c(8L, 8L, 8L,
8L), Time_Point = c("Baseline", "Baseline", "Baseline", "Baseline"
), Treatment = c("NA", "NA", "NA", "NA"), Treatment_Protocol = c("NA",
"NA", "NA", "NA"), Raw_Data = c(0L, 0L, 0L, 0L), Notes = c("controls are IBS, not healty",
"controls are IBS, not healty", "controls are IBS, not healty",
"controls are IBS, not healty"), ylab = c("Female → Male",
"Female → Male", "Female → Male", "irritable bowel syndrome → ulcerative colitis"
)), .Names = c("Probe_gene", "logFC", "CI_L", "CI_R", "AveExpr",
"t", "P_Value", "adj_P_Value", "B", "gene", "Study_ID", "Group1",
"Group2", "Study_ID", "Disease", "DOID", "Title", "GEO_Platform_ID",
"Platform", "PMID", "Organism", "Data_Type", "Biomaterial", "Study_Type",
"Samples", "Time_Point", "Treatment", "Treatment_Protocol", "Raw_Data",
"Notes", "ylab"), row.names = c(NA, 4L), class = "data.frame")
我用这个来构建一个包含GSE#(Study_ID
)的图,然后是y轴上的对比度(Group1
vs Group2
),{{ 1}}作为x轴。为了清晰起见,我想在每个不同的GSE#之间绘制一条水平线,但我的代码似乎不起作用。
logFC
由于某些原因我现在使用的代码,datasetList = tbl$Study_ID
hLines =(which(duplicated(datasetList) == FALSE) - 0.5)
tbl$ylab <- paste(tbl$Group2," \U2192 ", tbl$Group1, sep = "")
p <- ggplot(data = tbl, aes(x = logFC, y = Probe_gene, group = Study_ID)) +
geom_point() +
geom_vline(xintercept = log(0.5,2), size = 0.2) +
geom_vline(xintercept = log(2/3,2), size = 0.2) +
geom_vline(xintercept = log(1.5,2), size = 0.2) +
geom_vline(xintercept = log(2,2), size = 0.2) +
geom_hline(yintercept = hLines) +
labs(title = tbl$gene, y = "Contrasts", x = bquote(~Log[2]~'(Fold Change)')) +
geom_errorbarh(aes(x = logFC, xmin = CI_L, xmax = CI_R), height = .1) +
geom_point(aes(colour = cut(adj_P_Value, c(-Inf, 0.01, 0.05, Inf)))) +
scale_color_manual(name = "P Value",
values = c("(-Inf,0.01]" = "red",
"(0.01,0.05)" = "orange",
"(0.05, Inf]" = "black"),
labels = c("<= 0.01", "0.01 < P Value <= 0.05", "> 0.05")) +
#theme_bw()+
theme(axis.text.y = element_blank(), strip.text.y = element_text(angle = 180),
panel.spacing.y = unit(0,'lines'), axis.ticks.y = element_blank()) +
facet_grid(Study_ID+ylab~ ., scales = 'free', space = 'free', switch = 'both')
p
打印的水平线多于我需要的水平线。它在每个 GSE#之间打印一条线,当我只需要它在唯一 GSE#之间打印一条线。我做错了什么? ggplot
包含行应该去的y截距。
P.S。作为一个侧面问题,如果有人知道我指定出现的形状的方式(类似于我指定颜色的方式),那将非常感激。在参考颜色时,我需要红色圆圈,橙色方块和黑色十字架,以适应hLines
功能中出现的相同条件。