我试图同时显示小提琴图和箱形图。
我正在使用细胞计数来显示不同癌症样品/组中免疫细胞的数量。当我为4个样本绘制表达式时,一切正常。当我添加另一个样本(GTEx_M2)时,所有其他4个样本的小提琴图都消失了,最后我只得到了箱形图。有什么建议吗?预先感谢!
library(ggplot2)
library(ggpubr)
Cibersort7 = structure(list(
Hot_M1 = c(0.0214400757119873, 0.170557805230298, 0.0804456569076382,
0.0893978598771954, 0.134477669028274, 0, 0.0525708788146097,
0.0511711964723951, 0.126904881120795, 0.0485101553521798,
0.170894800822398, 0.106555021195299, 0.0970104286070479,
0.115825265978309, 0.0427923320117795, 0.0733825856784013,
0.0111265771852828, 0.0657019859547462, 0.11656416302191,
0.172002238486688, 0.0154591596631105, 0.0350445248592811,
0.0795539781894198, 0.0781276090630857, 0.0087982313041526,
0.0289274652853823, 0.0712661645666698, 0.0435482190581647,
0.0455556872660798, 0.0871522448556361),
Cold_M1 = c(0.0346024087291239, 0.0201947741817111, 0.0306194109725081,
0.0277445612030966, 0.00905915199266666, 0.00939058305405205,
0.0146535473252646, 0.0159980760737253, 0.147670469457772,
0.0426119074182886, 0.0219251208462312, 0.0128996237306264,
0.0094816829459359, 0.0219336027293415, 0.0438220246067735,
0.00950926112282649, 0.0838386603270565, 0.0486661009213444,
0.00651564872414969, 0.00110323590537234, 0.0807125087307139, 0,
0.037709808301658, 0, 0.0898041410439557, 0.0417739517920607, 0,
0.0202168551193018, 0.00176008746063679, 0.0161337603014608),
Hotnorm_M1 = c(0.00622155478760928, 0.00864956989565159, 0.0245812979257332,
0.0339687958970202, 8e-04, 0, 0.0582086801600888, 0,
0.03481918582501, 0.021338008027511, 0.0157360408231509,
0.00489068636912568, 0.0281166183638247, 0.0162726467268935,
0.0415769266772567, 0, 0.00344830695596762, 0.00196737745405557,
0.0075141479562764, 0.0232464687737552, 0, 0, 0.0289423690350636,
0.0218584208695064, 0.0255945495324721, 4e-04, 0.0221942067802419,
0.00476738514342175, 0.00722699142988291, 0.00974645683928458),
Coldnorm_M1 = c(0.0280536098964266, 0.0261826834038114, 0.0150413750071331, 0,
0.0199730743908202, 0.0115748800373456, 0.0275674859254823,
0.0168847795974374, 0.0140281070945953, 0.00907861159279308,
0, 0, 0, 0.0453414461512909, 0, 0.00730963773612433,
0.0236424416792874, 0.0866914356225127, 0.0246339344582405,
0.00881531992455549, 0.0140744199322424, 0, 0, 0,
0.0319211626770028, 0.00155291355277603, 0.00295913497381517,
0.00738775271575955, 0.0179786878323852, 0.00442919920031897),
GTEx_M1 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0.00551740159760184, 0, 0, 0, 0, 0)),
row.names = c(NA, -30L),
class = c("tbl_df", "tbl", "data.frame"))
这只是我数据的一小部分,仍然显示出与我看到的相同的问题。
y_axis = list(na.omit(Cibersort7$Hot_M1),
na.omit(Cibersort7$Cold_M1),
na.omit(Cibersort7$Hotnorm_M1),
na.omit(Cibersort7$Coldnorm_M1),
na.omit(Cibersort7$GTEx_M1))
groupname = groupexpression = data = violinPlot = pairwise_results = list(5)
for (i in 1:5){
groupname[[i]] = as.factor(colnames(Cibersort7[, i]))
groupexpression[[i]] = y_axis[[i]]
data[[i]] = data.frame("Sample" = groupname[[i]],
"Expression" = groupexpression[[i]])
}
dataframe = do.call(rbind, data)
dataframe$Sample = as.factor(dataframe$Sample)
my_comparisons = list(c("Hot_M1", "Cold_M1"),
c("Hot_M1", "Hotnorm_M1"),
c("Hot_M1", "GTEx_M1"),
c("Cold_M1", "Coldnorm_M1"),
c("Cold_M1", "GTEx_M1"))
violinPlot = ggplot(dataframe,
aes(x =Sample, y = Expression, fill = Sample)) +
geom_violin(trim = FALSE) +
geom_boxplot(width=0.1, fill="white") +
labs(title ="Distribution of M2 Macrophages",
x = "Tissue Samples", y = "Cibersort Count") +
theme_classic()
violinPlot
这是我的小提琴图:
在添加GTEx数据之前,它们是这样的:
这是单独显示时的GTEx小提琴图:
我了解我的GTEx数据为零,但是为什么小提琴图消失了?
答案 0 :(得分:0)
geom_violin
有一个名为scale
的参数,该参数采用默认值"area"
。来自?geom_violin
:
如果是“ area”(默认),则所有小提琴的面积都相同(修剪前) 尾巴)。如果为“计数”,则面积将按比例缩放 的意见。如果为“宽度”,则所有小提琴的最大宽度均相同。
由于GTEx的Expression值集中在0,因此其密度在该值处急剧上升。我们可以在正常密度图中更明显地看到它,每个样品的线都相互重叠:
ggplot(dataframe,
aes(x = Expression, color = Sample)) +
geom_density() +
theme_classic()
使用默认的scale = "area"
参数,数据中包括GTEx意味着所有其他样本的小提琴图变得更加细小,因此几乎完全被盒图覆盖。如果您将Boxplot图层注释掉,您仍然可以看到它们。
如果希望每个小提琴之间具有可比的可见性,则可以设置scale = "width"
。如果选择此选项,您可能还想向目标受众突出这一点,因为scale = "area"
更为普遍,当某些小提琴看上去明显大于其他小提琴时,人们可能会感到困惑。
ggplot(dataframe,
aes(x = Sample, y = Expression, fill = Sample)) +
geom_violin(trim = FALSE, scale = "width") +
geom_boxplot(width=0.1, fill="white") +
labs(title ="Distribution of M2 Macrophages",
x = "Tissue Samples", y = "Cibersort Count") +
theme_classic()
p.s。。您可以简化数据处理步骤,从我的判断来看,这实际上是从宽格式到长格式的转换。通常的方法是通过melt
(来自reshape2软件包)或gather
(来自tidyr软件包)。这是一个可能的实现:
library(dplyr)
library(tidyr)
df2 <- Cibersort7 %>%
gather(Sample, Expression) %>%
mutate(Sample = factor(Sample, levels = colnames(Cibersort7)))
> all.equal(dataframe, as.data.frame(df2))
[1] TRUE
pps 如果有多个人在您的话题中发表评论,而您在回复中没有@任何人,则没人会收到任何有关此事的通知,如果您这样做,这将是一种浪费。已经解决了改善您的问题的所有麻烦。有关系统如何工作的说明,请参见here。