我的数据框mydf
包含两列LeftGenes
和RightGenes
。我有另一个数据框mydf2
。我想匹配mydf列中的项目(如果是多个,它们用':'分隔)与mydf2中的登录列匹配,并用相应的gene_id替换并获得结果。
mydf <-structure(c("NP_570602", "NA", "NA", "XP_006719119:NR_040112",
"NA", "NA", "NM_000662:NM_001160170:NM_001160171", "NA"), .Dim = c(4L,
2L), .Dimnames = list(NULL, c("LeftGenes", "RightGenes")))
mydf2<-structure(list(gene_id = c("1", "1", "2", "2", "2", "2", "3",
"9", "9", "9"), accession = c("NM_130786", "NP_570602", "NM_000014",
"NP_000005", "XM_006719056", "XP_006719119", "NR_040112", "NM_000662",
"NM_001160170", "NM_001160171")), .Names = c("gene_id", "accession"
), row.names = c(NA, 10L), class = "data.frame")
结果
LeftGenes RightGenes
1 1 NA
2 NA NA
3 NA 9:9:9
4 2:3 NA
答案 0 :(得分:1)
试试这个
mydf <- data.frame(mydf)
#we use this to split up each LeftGene and RightGene to get all the available options
tmpleft <- strsplit(as.character(mydf$LeftGenes),":")
tmpright <- strsplit(as.character(mydf$RightGenes),":")
# here are your desired "left" results in a list
leftres <- lapply(1:length(tmpleft),
function(m) paste(mydf2$gene_id[match(tmpleft[[m]],mydf2$accession)],collapse=":"))
# here are your desired "right" results in a list
rightres <- lapply(1:length(tmpright),
function(m) paste(mydf2$gene_id[match(tmpright[[m]],mydf2$accession)],collapse=":"))
# now we just need to join them into a dataframe
res <- data.frame(t(data.frame(leftres)),t(data.frame(rightres)))
colnames(res) <- c("LeftGenes","RightGenes")
rownames(res) <- 1:length(tmpleft)