这是一个例子:
library("data.table")
set.seed(12)
table201901 <- data.frame(ID = as.character(paste0(sample(LETTERS,5000, replace=TRUE),sample(0:9,5000, replace=TRUE))),
EID = paste0(sample(LETTERS,200, replace=TRUE),sample(letters,5000, replace=TRUE),sample(1:9,5000, replace=TRUE))
)
set.seed(15)
table201812 <- data.frame(ID = paste0(sample(LETTERS,5000, replace=TRUE),sample(0:9,5000, replace=TRUE)),
EID = paste0(sample(LETTERS,5000, replace=TRUE),sample(letters,5000, replace=TRUE),sample(0:9,5000, replace=TRUE))
)
table201901 <- as.data.table(table201901)
table201812 <- as.data.table(table201812)
setkey(table201901, "ID")
setkey(table201812, "ID")
table201901[!table201812]
fsetdiff(table201901, table201812)
setkey(table201901, "ID","EID")
setkey(table201812, "ID","EID")
table201901[!table201812]
fsetdiff(table201901, table201812)
我的目标是要获得一个表,该表的行项目在table201901中,但不在table201812中。
我认为fsetdiff(table1,table2)
和table1[!table2]
会给我相同的输出,但是却没有。
答案 0 :(得分:1)
以下示例说明了这两种方法。为简单起见,生成了两个小的虚拟data.tables,它们具有部分重叠的行:
library(data.table)
(dt1 <- data.table(
col1 = rep(c(1, 3, 3), each = 2),
col2 = rep(c("only dt1", "match col2", "match row"), each = 2)
))
#> col1 col2
#> 1: 1 only dt1
#> 2: 1 only dt1
#> 3: 3 match col2
#> 4: 3 match col2
#> 5: 3 match row
#> 6: 3 match row
(dt2 <- data.table(
col1 = rep(c(3, 4, 5), each = 2),
col2 = rep(c("match row", "match col2", "only dt2"), each = 2)
))
#> col1 col2
#> 1: 3 match row
#> 2: 3 match row
#> 3: 4 match col2
#> 4: 4 match col2
#> 5: 5 only dt2
#> 6: 5 only dt2
## keep all rows in dt1 not in dt2
dt1[!dt2, on = names(dt1)]
#> col1 col2
#> 1: 1 only dt1
#> 2: 1 only dt1
#> 3: 3 match col2
#> 4: 3 match col2
## equivalent output with fsetdiff
fsetdiff(dt1, dt2, all = TRUE)
#> col1 col2
#> 1: 1 only dt1
#> 2: 1 only dt1
#> 3: 3 match col2
#> 4: 3 match col2
## keep unique rows in dt1 not in dt2
unique(dt1[!dt2, on = names(dt1)])
#> col1 col2
#> 1: 1 only dt1
#> 2: 3 match col2
## equivalent output with fsetdiff
fsetdiff(dt1, dt2, all = FALSE)
#> col1 col2
#> 1: 1 only dt1
#> 2: 3 match col2
## keep rows dt1 not matching in dt2 only by "col2"
dt1[!dt2, on = "col2"]
#> col1 col2
#> 1: 1 only dt1
#> 2: 1 only dt1
由reprex package(v0.3.0)于2019-06-28创建