我正在尝试使用ggplot2
创建条带图。以下是tbl
的子集,其中包含我正在使用的相关列以及dput
。
> tbl[,c('Study_ID', 'Probe_ID', 'Group1','Group2','LogFC', 'adj_P_Value', 'P_Value', 'CI_L','CI_R','Disease')]
Study_ID Probe_ID Group1 Group2 LogFC adj_P_Value P_Value CI_L CI_R
1 GSE2461 220307_at Male Female -0.09017596 1.000000e+00 5.662047e-01 -0.43955752 0.25920561
2 GSE2461 220307_at ulcerative colitis irritable bowel syndrome 0.08704844 1.000000e+00 5.784053e-01 -0.26134341 0.43544028
3 GSE27887 220307_at nonlesional skin lesional skin -0.03501474 1.000000e+00 4.409881e-01 -0.12677636 0.05674688
4 GSE27887 220307_at pretreatment posttreatment 0.01096914 1.000000e+00 8.080366e-01 -0.08064105 0.10257932
5 GSE42296 7921677 Infliximab Before treatment -0.03707265 1.000000e+00 3.979403e-01 -0.12407201 0.04992672
6 GSE42296 7921677 Responder Nonresponder 0.07644834 1.000000e+00 1.505444e-01 -0.02849309 0.18138977
7 GSE42296 7921677 Rheumatoid Arthritis Crohn's Disease 0.42318863 3.960125e-06 1.989713e-10 0.31076269 0.53561457
8 GSE58558 220307_at M F -0.11881801 1.000000e+00 1.130180e-01 -0.26629675 0.02866072
9 GSE58558 220307_at non lesional skin lesional skin -0.18914128 1.000000e+00 3.696739e-03 -0.31525660 -0.06302596
10 GSE58558 220307_at responder nonresponder -0.14470319 1.000000e+00 2.328062e-01 -0.38396386 0.09455748
11 GSE58558 220307_at week 12 day 1 -0.39619004 4.311942e-01 2.215798e-05 -0.57226227 -0.22011781
12 GSE58558 220307_at week 2 day 1 -0.28765455 1.000000e+00 8.753977e-04 -0.45375957 -0.12154953
13 GSE59294 220307_at C Dupilumab 300 mg B Dupilumab 150 mg 0.16853309 1.000000e+00 1.140155e-01 -0.04273877 0.37980494
14 GSE59294 220307_at D Placebo B Dupilumab 150 mg -0.18995566 1.000000e+00 2.264691e-01 -0.50367856 0.12376724
15 GSE59294 220307_at NL skin LS skin 0.01376129 1.000000e+00 9.041383e-01 -0.21711706 0.24463964
16 GSE59294 220307_at Pre Post 0.02234607 1.000000e+00 8.069367e-01 -0.16235054 0.20704268
Disease
1 irritable bowel syndrome; ulcerative colitis
2 irritable bowel syndrome; ulcerative colitis
3 atopic Dermatitis
4 atopic Dermatitis
5 Crohn's Disease; Rheumatoid Arthritis
6 Crohn's Disease; Rheumatoid Arthritis
7 Crohn's Disease; Rheumatoid Arthritis
8 Atopic Dermatitis
9 Atopic Dermatitis
10 Atopic Dermatitis
11 Atopic Dermatitis
12 Atopic Dermatitis
13 atopic Dermatitis
14 atopic Dermatitis
15 atopic Dermatitis
16 atopic Dermatitis
以下是dput
:
> dput(droplevels(tbl[,c('Study_ID', 'Probe_ID', 'Group1','Group2','LogFC', 'adj_P_Value', 'P_Value', 'CI_L','CI_R','Disease')]))
structure(list(Study_ID = c("GSE2461", "GSE2461", "GSE27887",
"GSE27887", "GSE42296", "GSE42296", "GSE42296", "GSE58558", "GSE58558",
"GSE58558", "GSE58558", "GSE58558", "GSE59294", "GSE59294", "GSE59294",
"GSE59294"), Probe_ID = c("220307_at", "220307_at", "220307_at",
"220307_at", "7921677", "7921677", "7921677", "220307_at", "220307_at",
"220307_at", "220307_at", "220307_at", "220307_at", "220307_at",
"220307_at", "220307_at"), Group1 = c("Male", "ulcerative colitis",
"nonlesional skin", "pretreatment", "Infliximab", "Responder",
"Rheumatoid Arthritis", "M", "non lesional skin", "responder",
"week 12", "week 2", "C Dupilumab 300 mg", "D Placebo", "NL skin",
"Pre"), Group2 = c("Female", "irritable bowel syndrome", "lesional skin",
"posttreatment", "Before treatment", "Nonresponder", "Crohn's Disease",
"F", "lesional skin", "nonresponder", "day 1", "day 1", "B Dupilumab 150 mg",
"B Dupilumab 150 mg", "LS skin", "Post"), LogFC = c(-0.0901759558643281,
0.0870484364429408, -0.0350147376937934, 0.0109691380052655,
-0.0370726462749328, 0.0764483363743359, 0.423188628619509, -0.118818013184408,
-0.189141277685995, -0.144703191279992, -0.396190039768736, -0.28765454670704,
0.168533085440721, -0.189955660434197, 0.0137612879743023, 0.0223460675171673
), adj_P_Value = c(1, 1, 1, 1, 1, 1, 3.96012504622782e-06, 1,
1, 1, 0.431194244819507, 1, 1, 1, 1, 1), P_Value = c(0.566204678925109,
0.578405275354266, 0.440988072013756, 0.808036622723435, 0.397940346528484,
0.150544373610059, 1.98971262936634e-10, 0.11301796668591, 0.00369673863311212,
0.232806229179741, 2.21579776371792e-05, 0.000875397680320129,
0.114015475901252, 0.226469133014055, 0.904138332714553, 0.806936684043586
), CI_L = c(-0.439557521861354, -0.261343410788222, -0.12677635951562,
-0.0806410486876688, -0.124072011981945, -0.0284930943795223,
0.310762687356251, -0.26629674914578, -0.315256597358499, -0.383963864121397,
-0.57226227039893, -0.453759565458485, -0.0427387734415052, -0.503678563834605,
-0.217117064412363, -0.162350541147386), CI_R = c(0.259205610132698,
0.435440283674103, 0.0567468841280329, 0.1025793246982, 0.0499267194320791,
0.181389767128194, 0.535614569882768, 0.0286607227769647, -0.0630259580134921,
0.0945574815614131, -0.220117809138542, -0.121549527955595, 0.379804944322947,
0.12376724296621, 0.244639640360967, 0.207042676181721), Disease = c("irritable bowel syndrome; ulcerative colitis",
"irritable bowel syndrome; ulcerative colitis", "atopic Dermatitis",
"atopic Dermatitis", "Crohn's Disease; Rheumatoid Arthritis",
"Crohn's Disease; Rheumatoid Arthritis", "Crohn's Disease; Rheumatoid Arthritis",
"Atopic Dermatitis", "Atopic Dermatitis", "Atopic Dermatitis",
"Atopic Dermatitis", "Atopic Dermatitis", "atopic Dermatitis",
"atopic Dermatitis", "atopic Dermatitis", "atopic Dermatitis"
)), .Names = c("Study_ID", "Probe_ID", "Group1", "Group2", "LogFC",
"adj_P_Value", "P_Value", "CI_L", "CI_R", "Disease"), row.names = c(NA,
-16L), class = "data.frame")
最后,这是我到目前为止的代码。
#test using ggplot2
maxFC = max(as.numeric(as.character(tbl$LogFC)))
minFC = min(as.numeric(as.character(tbl$LogFC)))
datasetList = tbl$Study_ID
hLines =(which(duplicated(datasetList) == FALSE) - 0.5)
tbl$ylab <- paste(tbl$Group2," \U2192 ","\n", tbl$Group1, sep = "")
p <- ggplot(data = tbl, aes(x = LogFC, y = Probe_ID, group = Study_ID)) +
geom_vline(xintercept = log(0.5,2), size = 0.2) +
geom_vline(xintercept = log(2/3,2), size = 0.2) +
geom_vline(xintercept = log(1.5,2), size = 0.2) +
geom_vline(xintercept = log(2,2), size = 0.2) +
labs(title = tbl$gene, y = "Contrasts", x = bquote(~Log[2]~'(Fold Change)')) +
geom_errorbarh(aes(x = LogFC, xmin = CI_L, xmax = CI_R), height = .1) +
geom_point(aes(colour = cut(adj_P_Value, c(-Inf, 0.01, 0.05, Inf)))) +
scale_color_manual(name = "P Value",
values = c("(-Inf,0.01]" = "red",
"(0.01,0.05)" = "orange",
"(0.05, Inf]" = "black"),
labels = c("<= 0.01", "0.01 < P Value <= 0.05", "> 0.05")) +
scale_shape_manual(values = c( 4,15,19)) +
coord_cartesian(xlim = c(min(-2,minFC),max(2,maxFC))) +
theme(axis.text.y = element_blank(), strip.text.y = element_text(angle = 180),
#panel.grid.major = element_blank(),
#panel.grid.minor = element_blank(),
axis.line.y = element_blank(),
axis.line.x = element_blank(),
#panel.background = element_rect(fill = 'white', colour = 'white'),
#panel.grid = element_blank(),
panel.spacing.y = unit(0.5,'lines'),
axis.ticks.y = element_blank()) +
facet_grid(Study_ID+ylab~ ., scales = 'free', space = 'free', switch = 'both')
p
基本上,积分实际位置是通过LogFC
值确定的,但是adj_P_Value <= 0.01
的点应显示为红色圆圈,在0.01到0.05之间显示为橙色方块,{{{ 1}}作为黑色十字架(即我提供的数据不应该显示任何方块)。我的尝试是在>= 0.05
中使用cut
,但这似乎不起作用。颜色显示正确,但形状不正确。这一直困扰着我。如果我违反了任何惯例或标准(我可能会这样做),请让我知道并提出一些可以实现我已经完成的事情。谢谢!
更新:
geom_point
答案 0 :(得分:1)
添加您想要的因素列
library(dplyr)
tbl <- tbl %>%
mutate(colourgroup = case_when(
adj_P_Value <= 0.01 ~ 1,
adj_P_Value > 0.01 & adj_P_Value < 0.05 ~ 2,
adj_P_Value >= 0.05 ~ 3 ))
然后改变
aes(x = LogFC, y = Probe_ID, group = Study_ID)
到
aes(x = LogFC, y = Probe_ID, colour = factor(colourgroup), shape = factor(colourgroup))
并且
scale_color_manual(values=c("red","orange","black")) +
scale_shape_manual(values=c(1,2,3))
这个最小的ggplot
命令对我有用。注意我故意切换x
和y
值,red
和orange
很难区分
ggplot(df2, aes(x = Probe_ID, y=LogFC, colour=factor(colourgroup), shape=factor(colourgroup))) +
geom_point() +
scale_color_manual(values=c("red","orange","black")) +
scale_shape_manual(values=c(1,2,3))