我有两个这样的数据框:
X A B C SUM
E 1 0 1 2
F 0 0 1 1
G 1 1 0 2
和此:
X A B C SUM
E 1 0 1 2
F 0 0 1 1
G 1 1 0 2
H 0 0 1 1
I 0 0 0 0
我想获得的结果是:
X A B C
H 0 0 1
所以,我想要一个能够创建另一个数据帧的代码,只有那些在两个数据帧中都不存在的行。此外,这些行的总和必须大于零。
有人可以帮助我吗? 谢谢!
答案 0 :(得分:0)
这是您问题的一个解决方案。
让两个数据集为mydata1
和mydata2
require(dplyr)
rbind(anti_join(mydata1 %>% filter(SUM > 0), mydata2 %>% filter(SUM > 0), by = colnames(mydata1)),
anti_join(mydata2 %>% filter(SUM > 0), mydata1 %>% filter(SUM > 0), by = colnames(mydata1)))
根据评论,您可以做的一件事是确保列相同。
require(dplyr)
common_columns <- intersect(colnames(mydata1), colnames(mydata2))
rbind(anti_join(mydata1 %>% filter(SUM > 0), mydata2 %>% filter(SUM > 0), by = common_columns),
anti_join(mydata2 %>% filter(SUM > 0), mydata1 %>% filter(SUM > 0), by = common_columns))
答案 1 :(得分:0)
require(data.table)
dat1 <- data.table(X = c("E","F","G"), A = c(1,0,1), B = c(0,0,1), C = c(1,1,0), SUM = c(2,1,2))
dat2 <- data.table(X = c("E","F","G","H","I"), A = c(1,0,1,0,0), B = c(0,0,1,0,0), C = c(1,1,0,1,0),
SUM = c(2,1,2,1,0))
dat3 <- rbind(dat1[,!(names(dat1) %in% "SUM"), with = FALSE], dat2[,!(names(dat2) %in% "SUM"), with = FALSE])
dat3[duplicated(dat3)==FALSE & duplicated(dat3, fromLast = TRUE)==FALSE &
rowSums(dat3[,!(names(dat3) %in% "X"), with = FALSE])>0]
答案 2 :(得分:0)
library(data.table)
dat1 <- data.table(X = c("E","F","G"), A = c(1,0,1), B = c(0,0,1), C = c(1,1,0), SUM = c(2,1,2))
dat2 <- data.table(X = c("E","F","G","H","I"), A = c(1,0,1,0,0), B = c(0,0,1,0,0), C = c(1,1,0,1,0),
SUM = c(2,1,2,1,0))
D1=dat1[!dat1$X%in%dat2$X,]
D2=dat2[!dat2$X%in%dat1$X,]
DF=rbind(D1,D2)
DF=DF[DF$SUM>0,]
DF$SUM=NULL