a<-data.frame(cbind("Sample"=c("100","101","102","103"),"Status"=c("Y","","","partial")))
b<-data.frame(cbind("Sample"=c("100","101","102","103","106"),"Status"=c("NA","Y","","","Y")))
desired<-data.frame(cbind("Sample"=c("100","101","102","103","106"),"Status"=c("Y","Y","","partial","Y")))
我有多个来源的样本处理数据,我想将它们组合成一个主列表。如何合并2个数据框之间的“状态”列,以便取代b以便为每个样本整理“Y”和“部分”?提前谢谢。
答案 0 :(得分:1)
我假设你想要保持a和b中的值具有优先级顺序,Y覆盖部分涵盖不包含任何内容的NA。
d <- merge(a,b,by="Sample",all=TRUE)
d$Status <- ""
d$Status[apply(c,1,function(x){any(is.na(x))})] <- "" # cleaning the NAs I introduced with the merge
d$Status[apply(c,1,`%in%`, x = "NA")] <- NA # or "NA" if you want to keep it this way, or "" if you want to get rid of them
d$Status[apply(c,1,`%in%`, x = "partial")] <- "partial"
d$Status[apply(c,1,`%in%`, x = "Y")] <- "Y"
d <- d[,c(1,4)]
# Sample Status
# 1 100 Y
# 2 101 Y
# 3 102
# 4 103 partial
# 5 106 Y
答案 1 :(得分:1)
require(data.table)
a<-data.table(cbind("Sample"=c("100","101","102","103"),"Status"=c("Y","","","partial")))
b<-data.table("Sample"=c("100","101","102","103","106"),"Status"=c("NA","Y","","","Y"))
c <- merge(a, b, by = "Sample", all=TRUE)
c[,Status := ifelse(!is.na(Status.x), Status.x, Status.y)]
c[,`:=` (Status.x=NULL, Status.y = NULL)]