我有两个df,一个是df1
df1<- structure(list(V1 = structure(c(1L, 2L, 3L, 7L, 5L, 6L, 4L, 9L,
8L), .Label = c("A0A061ACH4;Q95Q10;Q9U1W6", "A0A061ACL3;Q965I6;O76618",
"A0A061ACR1;Q2XN02;F5GUA3;Q22498", "A0A061AJJ3;A0A061AEA8", "A0A061AL01",
"C1P641", "H2FLH3;H2FLH2;A0A061ACT3;A0A061AE24;Q23551-2;Q23551;Q23551-4;Q23551-3;Q23551-5",
"Q22501;A0A061AE05", "Q86CZ7"), class = "factor")), .Names = "V1", class = "data.frame", row.names = c(NA,
-9L))
另一个是df2
df2 <- structure(list(V1 = structure(c(1L, 2L, 3L, 6L, 5L, 4L, 8L, 9L,
7L), .Label = c("A0A061ACH4;Q95Q10;Q9U1W6", "A0A061ACL3;Q965I6;O76618",
"A0A061ACR1;Q2XN02;F5GUA3;Q22498", "A0A061AJJ3;A0A061AEA8", "A0A061AL01",
"H2FLH3;H2FLH2;A0A061ACT3;A0A061AE24;Q23551-2;Q23551;Q23551-4;Q23551-3;Q23551-5",
"Q22501;A0A061AE05", "Q27GQ4", "Q86CZ7"), class = "factor")), .Names = "V1", class = "data.frame", row.names = c(NA,
-9L))
我想逐行比较这两者。 哪条线从df1到df2相似,反之亦然
然后使用df1和df2中的所有唯一行创建一个输出(表示来自这两个df的所有行在一个新的df中)
然后在df1中我们没有的那些行的前面,但是我们在df2中,我们提到零和df2相同。
预期输出可以如下
output<- structure(list(V1 = structure(c(1L, 2L, 3L, 4L, 8L, 6L, 7L, 5L,
10L, 11L, 9L), .Label = c("", "A0A061ACH4;Q95Q10;Q9U1W6", "A0A061ACL3;Q965I6;O76618",
"A0A061ACR1;Q2XN02;F5GUA3;Q22498", "A0A061AJJ3;A0A061AEA8", "A0A061AL01",
"C1P641", "H2FLH3;H2FLH2;A0A061ACT3;A0A061AE24;Q23551-2;Q23551;Q23551-4;Q23551-3;Q23551-5",
"Q22501;A0A061AE05", "Q27GQ4", "Q86CZ7"), class = "factor"),
V2 = structure(c(3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
1L), .Label = c("", "0", "df1"), class = "factor"), V3 = structure(c(3L,
1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L), .Label = c("", "0",
"df2"), class = "factor")), .Names = c("V1", "V2", "V3"), class = "data.frame", row.names = c(NA,
-11L))
此处Q27GQ4
在df1中不存在但存在于df2中,因此在输出中的df1列中我们提到零
此处C1P641
存在于df1中但不存在于df2中,因此在输出中的df2列中我们提到零。
我很感激任何帮助,因为我是R的新手,我无法弄清楚如何做到这一点
答案 0 :(得分:0)
这是一种方法:
strServername
答案 1 :(得分:0)
试试这个:
op <- merge(df1,df2,
all.x = TRUE,
all.y = TRUE)
op$df1 <- 1*(op$V1 %in% df1$V1)
op$df2 <- 1*(op$V1 %in% df2$V1)
> op
V1 df1 df2
1 A0A061ACH4;Q95Q10;Q9U1W6 1 1
2 A0A061ACL3;Q965I6;O76618 1 1
3 A0A061ACR1;Q2XN02;F5GUA3;Q22498 1 1
4 A0A061AJJ3;A0A061AEA8 1 1
5 A0A061AL01 1 1
6 C1P641 1 0
7 H2FLH3;H2FLH2;A0A061ACT3;A0A061AE24;Q23551-2;Q23551;Q23551-4;Q23551-3;Q23551-5 1 1
8 Q22501;A0A061AE05 1 1
9 Q86CZ7 1 1
10 Q27GQ4 0 1
OR
library(dplyr)
op <- merge(df1,df2,
all.x = TRUE,
all.y = TRUE) %>%
mutate(df1=1*(V1 %in% df1$V1),
df2=1*(V1 %in% df2$V1))
以下是您额外问题的答案:
- 知道df1和df2有多少行相似?
sum(df1$V1 %in% df2$V1)
- df1中存在哪些df1并不存在?
sum(!(df1$V1 %in% df2$V1))
- df2中存在多少df2并不存在?
sum(!(df2$V1 %in% df1$V1))