我使用以下命令在下面制作了一个方面图:
ggplot(data, aes(factor(Length),logFC)),
+ geom_boxplot(fill = "grey90"),
+ coord_cartesian(ylim=c(-5,5)) + facet_grid(X~Modification)
有没有办法计算每个箱图的p值,并将它们作为geom_text添加到每个箱图上方。我想计算一个t检验并与y = 0进行比较。
我的数据如下:
X Length logFC Modification
Daub 26 -0.7307060811 NTA
Daub 22 -0.3325621272 NTA
Daub 22 -2.0579390395 NTA
Daub 25 2.7199391457 NTA
Daub 23 -0.0009869389 NTA
Daub 25 -0.3318842493 NTA
...
我的错误讯息:
> data <- structure(list(Experiment = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
+ 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
+ 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
+ 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
+ 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
+ 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
+ 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
+ 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
+ 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
+ 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
+ 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
+ 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
+ 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
+ 3L, 3L, 3L, 3L, 3L,
+ 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
+ 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("Daub", "Marie",
+ "Meister"), class = "factor"), Length = c(26L, 22L, 22L, 25L,
+ 23L, 25L, 23L, 25L, 24L, 23L, 24L, 26L, 24L, 21L, 20L, 21L, 22L,
+ 22L, 21L, 21L, 21L, 22L, 21L, 22L, 21L, 21L, 20L, 20L, 21L, 25L,
+ 20L, 22L, 24L, 22L, 23L, 24L, 23L, 23L, 22L, 22L, 22L, 22L, 21L,
+ 19L, 21L, 20L, 20L, 20L, 19L, 19L, 19L, 22L, 23L, 23L, 22L, 23L,
+ 22L, 20L, 21L, 24L, 24L, 24L, 25L, 24L, 21L, 20L, 23L, 23L, 20L,
+ 23L, 23L, 24L, 20L, 21L, 22L, 24L, 23L, 22L, 23L, 22L, 23L, 23L,
+ 19L, 21L, 23L, 24L, 22L, 23L, 23L, 21L, 22L, 20L, 22L, 23L, 25L,
+ 22L, 22L, 23L, 22L, 23L, 25L, 25L, 24L, 24L, 23L, 22L, 22L, 25L,
+ 23L, 24L, 23L, 23L, 22L, 22L, 25L, 23L, 22L, 25L, 21L, 19L, 21L,
+ 23L, 22L, 22L, 20L, 20L, 20L, 23L, 22L, 21L, 21L, 23L, 23L, 23L,
+ 21L, 25L, 23L, 24L, 24L, 23L, 23L, 23L, 21L, 22L, 21L, 21L, 23L,
+ 23L, 22L, 22L, 21L, 22L, 22L, 25L, 24L, 24L, 22L, 24L, 24L, 23L,
+ 22L, 21L, 22L, 23L, 20L, 22L, 23L, 24L, 25L, 24L, 25L, 22L, 23L,
+ 24L, 21L, 25L, 23L, 19L, 21L, 21L, 22L, 20L, 21L, 18L, 20L, 20L,
+ 21L, 20L, 23L, 19L, 19L, 22L, 22L, 22L, 22L, 22L, 21L, 22L, 24L,
+ 20L, 21L, 22L, 22L, 21L, 21L, 21L, 21L, 21L, 23L, 23L, 23L, 25L,
+ 25L, 25L, 23L, 24L, 24L, 24L, 24L, 24L, 24L, 25L, 25L), logFC = c(-0.7307060811,
+ -0.3325621272, -2.0579390395, 2.7199391457, -0.0009869389, -0.3318842493,
+ -2.1922199037, -1.8907961065, -1.9059255014, -0.2815081355, -0.2040330335,
+ 3.661469505, 0.6489955587, -0.0261245467, -1.4312409441, -1.1199604078,
+ -1.6528592355, -2.8208936451, -0.7207549269, -1.6528592355, -1.2540377475,
+ -2.1088724443, -2.1088724443, -1.5556550771, -1.5556550771, -0.2899601367,
+ 0.36449851, -1.7787723427, -1.5556550771, -1.5556550771, -1.5556550771,
+ -2.1092566794, 0.0417776477, -3.0768675589, -4.2573082637, -1.5556550771,
+ -1.8493703566, -0.7310899725, -2.8201262449, -0.7203706918, -2.1088724443,
+ -3.5714106365, -1.5556550771, -1.2144625017, 1.6608916211, -0.3147141406,
+ 1.2344697053, 1.2303596917, 1.2138067782, 0.9409846988, 0.5270928206,
+ -1.0435216994, -1.4320081419, -1.1644217165, -1.1478237529, -0.9941196613,
+ 0.0762668692, 1.0076747803, 0.0679302699, -0.4852244221, 0.7792467457,
+ 0.4902414285, 1.6172022872, 0.5270928206, -1.5403877099, -0.3322684844,
+ 0.0965099283, 0.8067662712, -0.3322684844, -1.2928579903, 0.6067208763,
+ 0.0247576412, -0.0291609233, -0.4737578429, 0.0743062433, 0.1126554177,
+ -0.0156954476, 1.1069888258, -0.956482117, -0.2829742145, 0.8511530937,
+ -0.1571780266, -1.2033199926, -1.1883052896, -0.0619556757, -0.7813018565,
+ 2.2467468049, 2.8382841074, 0.5658773933, -0.4461699001, -0.7409548873,
+ -0.992979577, -1.0966445642, -0.8035321174, 0.4586171366, -0.2760821893,
+ 0.0585422656, 0.0328935437, 0.3858231436, -0.4374188039, 1.1166538873,
+ -1.6539303789, 0.2027459981, -0.2193112677, -0.3939953745, -1.6726108643,
+ 1.1518720793, 2.2517568637, -0.561147283, -2.1625509666, -1.65562751,
+ -0.9048469063, -1.0759388341, 0.4938537603, 1.8754485108, -1.5944759871,
+ 1.0688499798, 2.6559945275, -1.908097968, -1.9214219995, -2.9675169126,
+ 0.0365892303, -0.8345258687, -1.0535567925, -2.0036191122, -1.6843791204,
+ -2.5554312825, -1.5778268888, -1.576142107, -0.9398408101, 2.4453250675,
+ -1.5434092122, -0.794414515, -0.6200158513, 0.5556353409, -1.0772272444,
+ -0.8720587283, -0.8082062813, -0.7353916189, 0.1072543637, 0.5658773933,
+ 0.13043531, -0.0154958912, -0.868710614, -0.1922496916, 1.0682890388,
+ -1.673413308, -0.9581901784, -1.9575141988, -1.8973257122, 1.4967046965,
+ -2.456068976, -1.4577030552, -4.2692094743, -1.9124787897, -1.4993411082,
+ -0.6409837734, 0.6369441273, -0.9960964825, -5.9703084924, -1.97960268,
+ -1.2422870608, -1.5170124157, -1.9021683731, 3.4029417731, 0.1812972171,
+ -1.6370149729, -1.749015407, -2.1677341592, -1.4942545905, -1.1137758818,
+ -1.2428452903, -1.3014446584, 0.0287537402, -0.8721416458, -2.4062762035,
+ -4.0278899462, -2.2229120764, -1.5950383235, -3.6098212725, -2.5979636046,
+ 0.3631424981, 1.1377073609, 0.5151459494, 0.0640542096, -0.7715375264,
+ -1.0361077101, -0.2462753448, -2.3058140776, -0.0847179004, -0.518970228,
+ 0.8519432911, 1.9516260022, -0.5706154628, 1.240812729, 0.336736001,
+ 2.2509464232, -0.322918086, -4.4019571741, -0.5618441487, 3.4700721641,
+ -3.9220135953, -2.1968879291, -0.1362995026, 2.164094913, -1.0688563363,
+ 0.4302583643, 2.6411096027, -3.020513717, -1.5395519303, -2.2219591633,
+ -3.8891956255, 0.9602784132, -0.6470571429, 1.853151793, -0.3271268741,
+ -0.9870872828, -2.516770073, -1.2898235194, -1.7246627604, -0.61328192,
+ -3.5457352204, -2.5068717697), Modification = structure(c(1L,
+ 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L,
+ 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L,
+ 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
+ 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L,
+ 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
+ 1L, 1L, 1L, 1L, 1L, 1L, 1L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
+ 5L, 5L, 5L, 5L, 5L, 5L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
+ 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
+ 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
+ 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
+ 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
+ 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L,
+ 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L,
+ 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("NTA",
+ "t3-d", "t3-u", "t5-d", "t5-u"), class = "factor")), .Names = c("Experiment",
+ "Length", "logFC", "Modification"), class = "data.frame", row.names = c(NA,
+ -223L))
> library(dplyr)
> pvalues <- data %>% group_by(Experiment, Modification, Length) %>%
+ filter(n() > 1) %>%
+ summarize(p.value = (t.test(logFC, mu = 0)$p.value))
Error in t.test(logFC, mu = 0) : object 'logFC' not found
答案 0 :(得分:3)
您可以通过将数据汇总到p值表中来完成此操作。这可以使用dplyr完成:
library(dplyr)
pvalues <- data %>% group_by(Experiment, Modification, Length) %>%
filter(n() > 1) %>%
summarize(p.value = (t.test(logFC, mu = 0)$p.value))
(第filter(n() > 1)
行将删除任何大小为1的组,其中无法计算p值)。这会生成一个看起来像这样的表:
# Experiment Modification Length p.value
# 1 Daub NTA 22 0.3980043
# 2 Daub NTA 23 0.3535590
# 3 Daub NTA 24 0.5831962
# 4 Daub NTA 25 0.9137644
# 5 Daub NTA 26 0.6254004
# 6 Daub t3-d 20 0.1493108
现在,您可以使用geom_text
图层将该文字添加到地块中,选择一些y,例如y = 3
:
library(ggplot2)
ggplot(data, aes(factor(Length),logFC)) + geom_boxplot(fill = "grey90") +
coord_cartesian(ylim=c(-5,5)) + facet_grid(Experiment~Modification) +
geom_text(aes(y = 3, label = p.value), data = pvalues, size = 1)
您可能需要操纵geom_text
的大小(以及可能的角度)才能使绘图可读。另请注意,由于您正在执行许多测试,因此您应该查看调整后的p值而不是原始p值。您可以使用
pvalues <- pvalues %>% mutate(p.adjusted = p.adjust(p.value, method = "bonferroni"))
函数format.pval
也会派上用场,特别是如果你的某些p值接近于0。