我有以下数据框。我想重新设计一个变量的数据帧,并为每个单元格保留一个值。该值不仅仅是第一个,但我想使用手动设置下拉顺序。
作为例子: Variant_Classification.recode我想保持降序SNVs Indel Splice_Site Translation_Start_Site和最后一次非编码。
我挣扎了一段时间,但我找不到任何解决方案!
谢谢!
mydata.df.test_3_mutations_col <- structure(list(Individual = structure(c(1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 5L, 5L, 5L, 6L, 6L,6L, 6L, 8L, 8L, 8L, 8L), .Label = c("p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9"), class = "factor"), Variant_Classification.recode = structure(c(3L, 1L, 2L, 4L, 5L, 3L, 4L, 2L, 1L, 3L, 2L, 2L, 3L, 3L, 5L, 1L, 3L,5L, 1L, 3L), .Label = c("Noncoding", "Indel", "SNVs", "Splice_Site", "Translation_Start_Site"), class = "factor"), Canonical_Hugo_Symbol = structure(c(1L,1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,1L, 1L, 1L), .Label = c("Gene1", "Gene2"), class = "factor")), .Names = c("Individual", "Variant_Classification.recode","Canonical_Hugo_Symbol"), row.names = c(50L, 51L, 116L, 166L, 167L, 168L, 169L, 292L, 293L, 342L, 343L, 344L, 357L, 358L, 359L, 462L, 522L, 523L, 524L, 631L), class = "data.frame")
mydata.reshape <- reshape(mydata.df.test_3_mutations_col, direction = 'wide', idvar = 'Individual', timevar = 'Canonical_Hugo_Symbol')
colnames(mydata.reshape) <- sub("Variant_Classification.recode.(.*?)","\\1", colnames(mydata.reshape))
df <- melt(mydata.reshape, id = 'Individual', variable_name = 'Canonical_Hugo_Symbol')
df <- na.omit(df)
q <- ggplot(df,aes(Individual,variable,fill=value))+geom_raster() # Plot mutation group and fill with gene symbol
q + theme(axis.text.x=element_text(angle=-90, hjust = 1, size = 4), text = element_text(size=8), panel.background = element_blank() , panel.grid.major = element_line(colour = "black", size = 0.03)) + labs(x = "Patients ID", y = "Gene Symbol") + scale_fill_brewer(palette="Set1", name = "Variant Classification")
现在我找到了解决方案!
首先我必须重新排序因子级别和数据框
mydata.df.test_3_mutations_col$Variant_Classification.recode <- factor(mydata.df.test_3_mutations_col$Variant_Classification.recode, levels = c("SNVs", "Indel", "Splice_Site", "Translation_Start_Site","Noncoding"),ordered = TRUE)
mydata.df.test_3_mutations_col <- mydata.df.test_3_mutations_col[order(mydata.df.test_3_mutations_col$Variant_Classification.recode),]
然后我可以重塑ggplot!
mydata.reshape <- reshape(mydata.df.test_3_mutations_col, direction = 'wide', idvar = 'Individual', timevar = 'Canonical_Hugo_Symbol')
colnames(mydata.reshape) <- sub("Variant_Classification.recode.(.*?)","\\1", colnames(mydata.reshape))
df <- melt(mydata.reshape, id = 'Individual', variable_name = 'Canonical_Hugo_Symbol')
df <- na.omit(df)
答案 0 :(得分:0)
您可以在dcast
包中尝试reshape2
,您可以在其中指定自定义聚合函数,以根据您的偏好选择值,如我们在此处所做的那样:
pref.order <- c("SNVs", "Indel", "Splice_Site", "Translation_Start_Site", "Noncoding")
my.fun <- function(x)
if(length(x))
head(as.character(x[order(match(x, pref.order))]), 1L) else NA_character_
library(reshape2)
dcast(
mydata.df.test_3_mutations_col,
Individual ~ Canonical_Hugo_Symbol,
value.var="Variant_Classification.recode",
fun.aggregate=my.fun
)
产地:
Individual Gene1 Gene2
1 p1 SNVs <NA>
2 p3 SNVs Indel
3 p5 <NA> SNVs
4 p6 SNVs SNVs
5 p8 SNVs <NA>