我正在尝试使用geom_point()绘图生成一个geom_violin()图,其中geom_point()图根据我将数据分类到哪个子集中具有不同颜色的点。
我有一个错误说"错误在eval(expr,envir,enclos):object' ind'找不到"当我在geom_point()函数中尝试加载子集数据帧时,但是我不会通过探索或谷歌搜索错误来理解我做错了什么。 (如果没有该行,代码会运行并生成此输出,除了点的颜色编码之外,这是我想要的:PDF output when the second geom_point is commented out)
这是我用来尝试完成这项工作的无意义数据集(gene1,2,3是rownames)。我将在下面的代码中转置它:
,cell_1,cell_2,cell_3,cell_4,cell_5,cell_6,cell_7,cell_8,cell_9,cell_10,cell_11,cell_12,cell_13,cell_14,cell_15,cell_16,cell_17,cell_18,cell_19,cell_20,cell_21,cell_22,cell_23,cell_24,cell_25,cell_26,cell_27,cell_28,cell_29,cell_30,cell_31,cell_32,cell_33,cell_34,cell_35,cell_36,cell_37,cell_38,cell_39,cell_40,cell_41,cell_42,cell_43,cell_44,cell_45,cell_46,cell_47,cell_48,cell_49,cell_50
gene1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.19230,0.0,0.0,0.0,0.19230,0.0,0.0,0.0,69.3915,0.0,0.0,74.123,0,0,0,0,0,13.01,0.0,0.0,0.0,0.0,0.0,0.9231,73.023,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
gene2,0.279204,23.456,13.1,10.5,0.0,14.2,151,2,50.3201,0.0,0.0,128.0,0.0,0.0,0.0,9.74082,20.9432,0.0,0.0,300.023,20.0234,0.0,0.0,300.024,123,201.345,164.681,301.421,173.023,216.537,201.234,302.102,199.234,20.234,40.234,180.0234,0.0,23.234,190.134,170.023,0.0,8.023,40.234,180.0234,0.0,23.234,190.134,170.023,21.24,8.023
gene3,25.9954,77.3398,45.3092,107.508,0.266139,70.4924,114.17,291.324,198.525,190.353,185.381,0.14223,90.323,20.4332,29.012,500.391,2.51459,300.021,60.001,192.023,60.0234,300.022,60.002,192.024,34,500.392,2.51460,300.022,60.002,192.024,60.0235,300.023,60.003,192.025,60.002,192.024,34,500.392,2.51460,300.022,60.002,192.024,60.0235,300.023,60.003,192.025,35,194.231,94.13,32.124
gene4,46.1717,194.241,0.776565,3.0325,0.762981,2.3123,14.507,13.0234,0.538315,0.0,1.5234,11.2341,0.0,1.34819,6.0142,3.2341,4.4444,150.324,0.0,20.9432,134.023,150.325,0.0,20.9433,3.2341,4.4444,150.324,0.0,20.9432,134.023,170.13408,0.0,3.2341,4.4444,150.324,0.0,3.2341,6.7023,150.324,0.0,3.2341,4.4444,170.341,0.0,20.9432,134.023,150.325,0.0,50.234,3.123
gene5,94.2341,301.234,0.0,0.0,123.371,0.0,0.0,155.234,0.0,0.664744,0.0,402.616,222.148,0.0,0.0,0.0,169.234,0.0,10.234,0.0,0.0,0.0,0.99234,0.0,0.99234,0.0,0.0,0.0,0.99234,0.0,0.99234,0.0,0.0,0.0,0.99234,0.0,10.324,0.0,0.0,15.0234,43.1243,0.0,320.023,0.0,0.0,0.0,1.234,0.0,12.123,0.0
这是我写的代码: #Load数据集 df_raw< - read.table(" pretend_dataset.csv", 九月="&#34 ;, 标题= TRUE)
#Make gene names into rownames
rownames(df_raw) <- df_raw$Name
#Remove "Name" column
df_raw$Name <- NULL
#TRANSPOSE DATASET
matrix_transp <- t(df_raw)
#Make matrix_transp matrix into dataframe
df <- as.data.frame(as.matrix(matrix_transp))
#Subset gene1 positive and negatve cells
df.positive <- subset(df, gene1 > 0)
#Convert data in data frames to log scale
df.log <- log(df+1)
df.positive.log <- log(df.positive+1)
#Violin plot for each gene with all cells (positive and negative with color coded scatter)
plot <- ggplot(stack(df.log), aes(x = ind, y = values, fill=ind)) +
geom_violin() +
geom_point(position = position_jitterdodge(jitter.width=4)) +
geom_point(data=df.positive.log, aes(x = ind, y = values, fill=ind), position = position_jitterdodge(jitter.width=4), color="red") +
xlab("Gene") + ylab("Expression level (TPM log)") +
theme_classic(base_size = 14, base_family = "Helvetica") +
theme(axis.text.y=element_text(size=14)) +
theme(axis.title.y=element_text(size=14, face="bold")) +
theme(axis.text.x=element_text(size=14)) +
theme(axis.title.x=element_text(size=14, face="bold")) +
scale_fill_brewer(palette="Pastel1")
plot + coord_cartesian(ylim = c(0, 8))
答案 0 :(得分:1)
下面的答案覆盖了一个彩色的小提琴情节,其中有一组由正面或负面着色的抖动点。
library(dplyr); library(ggplot2); library(tidyr)
#read in data.
df2 <-read.csv(textConnection(df), header=TRUE, row.names = 1)
# Add in the rownames and gather the dataset
df3 <- df2 %>% mutate(Gene= rownames(.)) %>%
gather(., key= "cell", value="value", -Gene) %>%
mutate(positive = value>0, absolute= abs(value), logabs= log(absolute+1))
df3 %>% ggplot(. , aes(x = Gene, y=logabs, fill=Gene)) +
geom_violin() +geom_jitter( aes(colour= positive))
这是你在找什么?
编辑:读取数据行,将您在上面显示的数据中的行粘贴到文本字符串中,然后将文本字符串转换为数据框。如果您已经拥有数据框,则没有必要。它仅用于没有可用的dput()对象。
编辑2 : 这个扩展的答案来自对前一个答案的评论。该解决方案使用问题中显示的数据的转置矩阵。得到的图表有小提琴图,由基因覆盖,并用基因1中的观察结果是否为负点着色。
确切的数据集如下所示,是在矩阵上调用dput()命令的结果。
df <- structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0.1923, 0, 0, 0, 0.1923, 0, 0, 0, 69.3915, 0, 0, 74.123, 0, 0,
0, 0, 0, 13.01, 0, 0, 0, 0, 0, 0.9231, 73.023, 0, 0, 0, 0, 0,
0, 0, 0, 0.279204, 23.456, 13.1, 10.5, 0, 14.2, 151, 2, 50.3201,
0, 0, 128, 0, 0, 0, 9.74082, 20.9432, 0, 0, 300.023, 20.0234,
0, 0, 300.024, 123, 201.345, 164.681, 301.421, 173.023, 216.537,
201.234, 302.102, 199.234, 20.234, 40.234, 180.0234, 0, 23.234,
190.134, 170.023, 0, 8.023, 40.234, 180.0234, 0, 23.234, 190.134,
170.023, 21.24, 8.023, 25.9954, 77.3398, 45.3092, 107.508, 0.266139,
70.4924, 114.17, 291.324, 198.525, 190.353, 185.381, 0.14223,
90.323, 20.4332, 29.012, 500.391, 2.51459, 300.021, 60.001, 192.023,
60.0234, 300.022, 60.002, 192.024, 34, 500.392, 2.5146, 300.022,
60.002, 192.024, 60.0235, 300.023, 60.003, 192.025, 60.002, 192.024,
34, 500.392, 2.5146, 300.022, 60.002, 192.024, 60.0235, 300.023,
60.003, 192.025, 35, 194.231, 94.13, 32.124, 46.1717, 194.241,
0.776565, 3.0325, 0.762981, 2.3123, 14.507, 13.0234, 0.538315,
0, 1.5234, 11.2341, 0, 1.34819, 6.0142, 3.2341, 4.4444, 150.324,
0, 20.9432, 134.023, 150.325, 0, 20.9433, 3.2341, 4.4444, 150.324,
0, 20.9432, 134.023, 170.13408, 0, 3.2341, 4.4444, 150.324, 0,
3.2341, 6.7023, 150.324, 0, 3.2341, 4.4444, 170.341, 0, 20.9432,
134.023, 150.325, 0, 50.234, 3.123), .Dim = c(50L, 4L), .Dimnames = list(
c("cell_1", "cell_2", "cell_3", "cell_4", "cell_5", "cell_6",
"cell_7", "cell_8", "cell_9", "cell_10", "cell_11", "cell_12",
"cell_13", "cell_14", "cell_15", "cell_16", "cell_17", "cell_18",
"cell_19", "cell_20", "cell_21", "cell_22", "cell_23", "cell_24",
"cell_25", "cell_26", "cell_27", "cell_28", "cell_29", "cell_30",
"cell_31", "cell_32", "cell_33", "cell_34", "cell_35", "cell_36",
"cell_37", "cell_38", "cell_39", "cell_40", "cell_41", "cell_42",
"cell_43", "cell_44", "cell_45", "cell_46", "cell_47", "cell_48",
"cell_49", "cell_50"), c("gene1", "gene2", "gene3", "gene4"
)))
将上述数据集转换为所需绘图所需的代码如下所示。
df2 <- df %>% as.data.frame %>% mutate(Cell= rownames(.), positive = gene1>0) %>%
gather(., key= "Gene", value="value", -Cell,-positive) %>%
mutate( absolute= abs(value), logabs= log(absolute+1))
df2 %>% ggplot(. , aes(x = Gene, y=logabs, fill=Gene)) +
geom_violin() +geom_jitter( aes(colour= positive))
由于情节可能难以解释,因此需要显示相对于gene1的状态的其他方法。
df2 %>% ggplot(., aes(x=Gene, y=logabs, fill=positive)) +geom_boxplot()
df2 %>% ggplot(. , aes(x = Gene, y=logabs, fill=positive)) +
geom_violin()