geom_point()的不同颜色基于dataframe的子集

时间:2016-09-12 12:41:05

标签: r ggplot2

我正在尝试使用geom_point()绘图生成一个geom_violin()图,其中geom_point()图根据我将数据分类到哪个子集中具有不同颜色的点。

我有一个错误说"错误在eval(expr,envir,enclos):object' ind'找不到"当我在geom_point()函数中尝试加载子集数据帧时,但是我不会通过探索或谷歌搜索错误来理解我做错了什么。 (如果没有该行,代码会运行并生成此输出,除了点的颜色编码之外,这是我想要的:PDF output when the second geom_point is commented out

这是我用来尝试完成这项工作的无意义数据集(gene1,2,3是rownames)。我将在下面的代码中转置它:

 ,cell_1,cell_2,cell_3,cell_4,cell_5,cell_6,cell_7,cell_8,cell_9,cell_10,cell_11,cell_12,cell_13,cell_14,cell_15,cell_16,cell_17,cell_18,cell_19,cell_20,cell_21,cell_22,cell_23,cell_24,cell_25,cell_26,cell_27,cell_28,cell_29,cell_30,cell_31,cell_32,cell_33,cell_34,cell_35,cell_36,cell_37,cell_38,cell_39,cell_40,cell_41,cell_42,cell_43,cell_44,cell_45,cell_46,cell_47,cell_48,cell_49,cell_50
gene1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.19230,0.0,0.0,0.0,0.19230,0.0,0.0,0.0,69.3915,0.0,0.0,74.123,0,0,0,0,0,13.01,0.0,0.0,0.0,0.0,0.0,0.9231,73.023,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
gene2,0.279204,23.456,13.1,10.5,0.0,14.2,151,2,50.3201,0.0,0.0,128.0,0.0,0.0,0.0,9.74082,20.9432,0.0,0.0,300.023,20.0234,0.0,0.0,300.024,123,201.345,164.681,301.421,173.023,216.537,201.234,302.102,199.234,20.234,40.234,180.0234,0.0,23.234,190.134,170.023,0.0,8.023,40.234,180.0234,0.0,23.234,190.134,170.023,21.24,8.023
gene3,25.9954,77.3398,45.3092,107.508,0.266139,70.4924,114.17,291.324,198.525,190.353,185.381,0.14223,90.323,20.4332,29.012,500.391,2.51459,300.021,60.001,192.023,60.0234,300.022,60.002,192.024,34,500.392,2.51460,300.022,60.002,192.024,60.0235,300.023,60.003,192.025,60.002,192.024,34,500.392,2.51460,300.022,60.002,192.024,60.0235,300.023,60.003,192.025,35,194.231,94.13,32.124
gene4,46.1717,194.241,0.776565,3.0325,0.762981,2.3123,14.507,13.0234,0.538315,0.0,1.5234,11.2341,0.0,1.34819,6.0142,3.2341,4.4444,150.324,0.0,20.9432,134.023,150.325,0.0,20.9433,3.2341,4.4444,150.324,0.0,20.9432,134.023,170.13408,0.0,3.2341,4.4444,150.324,0.0,3.2341,6.7023,150.324,0.0,3.2341,4.4444,170.341,0.0,20.9432,134.023,150.325,0.0,50.234,3.123
gene5,94.2341,301.234,0.0,0.0,123.371,0.0,0.0,155.234,0.0,0.664744,0.0,402.616,222.148,0.0,0.0,0.0,169.234,0.0,10.234,0.0,0.0,0.0,0.99234,0.0,0.99234,0.0,0.0,0.0,0.99234,0.0,0.99234,0.0,0.0,0.0,0.99234,0.0,10.324,0.0,0.0,15.0234,43.1243,0.0,320.023,0.0,0.0,0.0,1.234,0.0,12.123,0.0

这是我写的代码:     #Load数据集     df_raw< - read.table(" pretend_dataset.csv",                          九月="&#34 ;,                          标题= TRUE)

#Make gene names into rownames
rownames(df_raw) <- df_raw$Name

#Remove "Name" column
df_raw$Name <- NULL

#TRANSPOSE DATASET
matrix_transp <- t(df_raw)

#Make matrix_transp matrix into dataframe
df <- as.data.frame(as.matrix(matrix_transp))

#Subset gene1 positive and negatve cells
df.positive <- subset(df, gene1 > 0)

#Convert data in data frames to log scale
df.log <- log(df+1)
df.positive.log <- log(df.positive+1)

#Violin plot for each gene with all cells (positive and negative with color coded scatter)

plot <- ggplot(stack(df.log), aes(x = ind, y = values, fill=ind)) +
  geom_violin() +
  geom_point(position = position_jitterdodge(jitter.width=4)) +
  geom_point(data=df.positive.log, aes(x = ind, y = values, fill=ind), position = position_jitterdodge(jitter.width=4), color="red") +
   xlab("Gene") + ylab("Expression level (TPM log)") +
   theme_classic(base_size = 14, base_family = "Helvetica") +
   theme(axis.text.y=element_text(size=14)) + 
   theme(axis.title.y=element_text(size=14, face="bold")) + 
   theme(axis.text.x=element_text(size=14)) +
   theme(axis.title.x=element_text(size=14, face="bold")) + 
   scale_fill_brewer(palette="Pastel1")

plot + coord_cartesian(ylim = c(0, 8))

1 个答案:

答案 0 :(得分:1)

下面的答案覆盖了一个彩色的小提琴情节,其中有一组由正面或负面着色的抖动点。

library(dplyr); library(ggplot2); library(tidyr)
#read in data. 
df2 <-read.csv(textConnection(df), header=TRUE, row.names = 1)

# Add in the rownames and  gather the dataset
df3 <- df2 %>% mutate(Gene= rownames(.)) %>% 
  gather(., key= "cell", value="value", -Gene) %>% 
  mutate(positive = value>0, absolute= abs(value), logabs= log(absolute+1))


df3 %>% ggplot(. , aes(x = Gene, y=logabs, fill=Gene)) +
  geom_violin() +geom_jitter( aes(colour= positive)) 

这是你在找什么?

编辑:读取数据行,将您在上面显示的数据中的行粘贴到文本字符串中,然后将文本字符串转换为数据框。如果您已经拥有数据框,则没有必要。它仅用于没有可用的dput()对象。

编辑2 : 这个扩展的答案来自对前一个答案的评论。该解决方案使用问题中显示的数据的转置矩阵。得到的图表有小提琴图,由基因覆盖,并用基因1中的观察结果是否为负点着色。

确切的数据集如下所示,是在矩阵上调用dput()命令的结果。

df <- structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0.1923, 0, 0, 0, 0.1923, 0, 0, 0, 69.3915, 0, 0, 74.123, 0, 0, 
0, 0, 0, 13.01, 0, 0, 0, 0, 0, 0.9231, 73.023, 0, 0, 0, 0, 0, 
0, 0, 0, 0.279204, 23.456, 13.1, 10.5, 0, 14.2, 151, 2, 50.3201, 
0, 0, 128, 0, 0, 0, 9.74082, 20.9432, 0, 0, 300.023, 20.0234, 
0, 0, 300.024, 123, 201.345, 164.681, 301.421, 173.023, 216.537, 
201.234, 302.102, 199.234, 20.234, 40.234, 180.0234, 0, 23.234, 
190.134, 170.023, 0, 8.023, 40.234, 180.0234, 0, 23.234, 190.134, 
170.023, 21.24, 8.023, 25.9954, 77.3398, 45.3092, 107.508, 0.266139, 
70.4924, 114.17, 291.324, 198.525, 190.353, 185.381, 0.14223, 
90.323, 20.4332, 29.012, 500.391, 2.51459, 300.021, 60.001, 192.023, 
60.0234, 300.022, 60.002, 192.024, 34, 500.392, 2.5146, 300.022, 
60.002, 192.024, 60.0235, 300.023, 60.003, 192.025, 60.002, 192.024, 
34, 500.392, 2.5146, 300.022, 60.002, 192.024, 60.0235, 300.023, 
60.003, 192.025, 35, 194.231, 94.13, 32.124, 46.1717, 194.241, 
0.776565, 3.0325, 0.762981, 2.3123, 14.507, 13.0234, 0.538315, 
0, 1.5234, 11.2341, 0, 1.34819, 6.0142, 3.2341, 4.4444, 150.324, 
0, 20.9432, 134.023, 150.325, 0, 20.9433, 3.2341, 4.4444, 150.324, 
0, 20.9432, 134.023, 170.13408, 0, 3.2341, 4.4444, 150.324, 0, 
3.2341, 6.7023, 150.324, 0, 3.2341, 4.4444, 170.341, 0, 20.9432, 
134.023, 150.325, 0, 50.234, 3.123), .Dim = c(50L, 4L), .Dimnames = list(
    c("cell_1", "cell_2", "cell_3", "cell_4", "cell_5", "cell_6", 
    "cell_7", "cell_8", "cell_9", "cell_10", "cell_11", "cell_12", 
    "cell_13", "cell_14", "cell_15", "cell_16", "cell_17", "cell_18", 
    "cell_19", "cell_20", "cell_21", "cell_22", "cell_23", "cell_24", 
    "cell_25", "cell_26", "cell_27", "cell_28", "cell_29", "cell_30", 
    "cell_31", "cell_32", "cell_33", "cell_34", "cell_35", "cell_36", 
    "cell_37", "cell_38", "cell_39", "cell_40", "cell_41", "cell_42", 
    "cell_43", "cell_44", "cell_45", "cell_46", "cell_47", "cell_48", 
    "cell_49", "cell_50"), c("gene1", "gene2", "gene3", "gene4"
    )))

将上述数据集转换为所需绘图所需的代码如下所示。

df2 <- df %>% as.data.frame %>% mutate(Cell= rownames(.), positive = gene1>0) %>% 
  gather(., key= "Gene", value="value", -Cell,-positive) %>% 
  mutate( absolute= abs(value), logabs= log(absolute+1))


df2 %>% ggplot(. , aes(x = Gene, y=logabs, fill=Gene)) +
  geom_violin() +geom_jitter( aes(colour= positive)) 

由于情节可能难以解释,因此需要显示相对于gene1的状态的其他方法。

df2 %>% ggplot(., aes(x=Gene, y=logabs, fill=positive)) +geom_boxplot()

df2 %>% ggplot(. , aes(x = Gene, y=logabs, fill=positive)) +
  geom_violin()