我正在尝试为不同的星团绘制火山图。我有2个条件,未治疗与已治疗。我有一个cellranger为我生成的差分表达式excel文件,但是在文件中它有多个簇,每个簇都有倍数变化和p值。如何创建一个包含所有群集而不是一个群集的火山图?我需要为每个星团做一个火山图,然后以某种方式将它们全部组合吗?
我使用此代码仅生成了一个群集的图...
macrophage_list <- read.table("differential_expression_macrophage.csv", header = T, sep = ",")`
EnhancedVolcano(macrophage_list, lab = as.character(macrophage_list$FeatureName), x = 'Cluster1.Log2.Fold.Change', y = 'Cluster1.Adjusted.P.Value', xlim = c(-8,8), title = 'Macrophage', pCutoff = 10e-5, FCcutoff = 1.5, pointSize = 3.0, labSize = 3.0)
如何合并excel文件中的所有信息以创建火山图?
我一个个地上传每个数据集群,然后使用rbind合并它们,但是有没有更简单/更快捷的方法来做到这一点?
dput(gene_list[1:20, 1:14])
的输出
structure(list(Feature.ID = structure(1:20, .Label = c("a", "b",
"c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o",
"p", "q", "r", "s", "t"), class = "factor"), Feature.Name = structure(1:20, .Label = c("A",
"B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N",
"O", "P", "Q", "R", "S", "T"), class = "factor"), Cluster.1_Mean.Counts = c(0.000960904,
0.000320301, 0.001281205, 0.000320301, 0.000320301, 0.016335362,
0.000960904, 0, 0.001601506, 0.000320301, 0.007046627, 0.026585,
0.017296265, 0.004804518, 0, 0.874742598, 0.017616566, 0.007366928,
0.008327831, 0.001921807), Cluster.1_Log2.fold.change = c(0.291978774,
1.954943787, -2.008530337, -2.482461526, 3.539906287, 0.407455991,
-0.214981215, 1.539906287, 0.802940693, 2.539906287, -1.333136538,
-1.879953595, -0.52422405, -0.877946228, 1.539906287, -0.629373147,
1.118442519, 0.170672478, 1.065975099, 1.099333696), Cluster.1_Adjusted.p.value = c(1,
0.910243711, 0.04672812, 0.080866038, 0.610296549, 0.80063597,
1, 1, 0.951841603, 0.797013021, 0.103401275, 0.000594428, 0.907754993,
0.532689631, 1, 0.480958806, 0.078345008, 1, 0.198557945, 0.668312142
), Cluster.2_Mean.Counts = c(0.000902278, 0.001804555, 0.006315943,
0.004511388, 0, 0.029775159, 0.001804555, 0, 0.002706833, 0,
0.023459216, 0.128123411, 0.030677437, 0.009022775, 0, 2.174488883,
0.018947828, 0.019850106, 0.010827331, 0.000902278), Cluster.2_Log2.fold.change = c(0.792589781,
4.769869705, 0.35201719, 0.839132367, 3.184907204, 1.32985554,
0.962514783, 3.184907204, 1.725475586, 2.599944703, 0.560416339,
0.580736324, 0.407299626, 0.184907204, 3.184907204, 0.816580902,
1.120776867, 1.742684876, 1.409613491, 0.599944703), Cluster.2_Adjusted.p.value = c(1,
0.153573448, 1, 0.737977734, 1, 0.14478935, 0.853816767, 1, 0.47952604,
1, 0.65316285, 0.507251471, 0.776636022, 1, 1, 0.346630571, 0.285006452,
0.060868933, 0.21546202, 1), Cluster.3_Mean.Counts = c(0.001813813,
0, 0.019045032, 0.00725525, 0, 0.022672657, 0.000906906, 0, 0,
0, 0.029927908, 0.043531502, 0.046252221, 0.029021001, 0, 3.146057931,
0.020858845, 0.013603594, 0.008162157, 0), Cluster.3_Log2.fold.change = c(1.455721575,
2.192687169, 2.008262598, 1.504631175, 3.192687169, 0.9044422,
0.334706174, 3.192687169, -0.451169021, 2.607724668, 0.931421856,
-1.032594057, 1.038258504, 1.970294748, 3.192687169, 1.412371018,
1.26985503, 1.14829305, 0.991053308, -0.451169021), Cluster.3_Adjusted.p.value = c(0.757752635,
1, 0.032609935, 0.33316083, 1, 0.441825712, 1, 1, 1, 1, 0.380305075,
0.605158722, 0.339946318, 0.016952505, 1, 0.056529024, 0.259458704,
0.339639234, 0.536765022, 1), Cluster.4_Mean.Counts = c(0.000641899,
0, 0.002567596, 0.004493293, 0, 0.010270384, 0.003209495, 0,
0.000641899, 0, 0.028243557, 0.160474756, 0.012196081, 0.005135192,
0, 1.199709274, 0.005135192, 0.004493293, 0.005777091, 0.001283798
), Cluster.4_Log2.fold.change = c(0.269229783, 1.661547206, -0.886889419,
0.778904157, 2.661547206, -0.289908942, 1.602653517, 2.661547206,
0.076584705, 2.076584705, 0.854192284, 0.961549693, -0.967809414,
-0.644261223, 2.661547206, -0.104384578, -0.790579612, -0.467735811,
0.459913345, 0.722947751), Cluster.4_Adjusted.p.value = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.584036686, 1, 1, 1, 1, 1, 1,
1, 1)), class = "data.frame", row.names = c(NA, 20L))
答案 0 :(得分:0)
根据数据集,您需要重塑形状,但是首先,为了使用正确的模式重塑形状,我们将重命名一些列名称:
colnames(df) <- gsub(".Mean", "_Mean", colnames(df))
colnames(df) <- gsub(".Log2", "_Log2", colnames(df))
colnames(df) <- gsub(".Adjus","_Adjus",colnames(df))
现在,我们可以使用pivot_longer
包中的tidyr
函数使用正确的样式来重塑它:
library(tidyr)
final_df <- df %>% pivot_longer(., -c(Feature.ID, Feature.Name), names_to = c("set",".value"), names_pattern = "(.+)_(.+)")
# A tibble: 80 x 6
Feature.ID Feature.Name set Mean.Counts Log2.fold.change Adjusted.p.value
<fct> <fct> <chr> <dbl> <dbl> <dbl>
1 a A Cluster.1 0.000961 0.292 1
2 a A Cluster.2 0.000902 0.793 1
3 a A Cluster.3 0.00181 1.46 0.758
4 a A Cluster.4 0.000642 0.269 1
5 b B Cluster.1 0.000320 1.95 0.910
6 b B Cluster.2 0.00180 4.77 0.154
7 b B Cluster.3 0 2.19 1
8 b B Cluster.4 0 1.66 1
9 c C Cluster.1 0.00128 -2.01 0.0467
10 c C Cluster.2 0.00632 0.352 1
# … with 70 more rows
现在,我们可以使用ggplot2
和ggrepel
库来为Feature.Name
加上标签来创建火山图(如果您没有ggrepel
,则必须安装):
library(ggplot2)
library(ggrepel)
ggplot(final_df, aes(x = Log2.fold.change,y = -log10(Adjusted.p.value), label = Feature.Name))+
geom_point()+
geom_text_repel(data = subset(final_df, Adjusted.p.value < 0.05),
aes(label = Feature.Name))
然后您将获得火山图,其中所有星团都已合并,所有点都具有相同的颜色,并且标注了Feature.names且调整后的p值<0.05