有没有更快的方法来查找向量与列表中每个元素之间的交集长度?
v <- c("b", "l")
l <- list(c("a", "b", "c"),
c("d", "b", "a"),
c("m", "l", "h", "b"))
unlist(lapply(l, function(x) length(intersect(v, x))))
答案 0 :(得分:1)
您可以尝试
vapply(l, function(x) sum(is.element(x,v)), 0)
或者
library(stringi)
m1 <- stri_list2matrix(l)
colSums(`dim<-`(m1 %in% v, dim(m1)))
fr <- function() rapply(l, function(x) length(intersect(v,x)))
fl <- function() unlist(lapply(l, function(x) length(intersect(v, x))))
fv <- function() vapply(l, function(x) length(intersect(v,
x)),FUN.VALUE=integer(1))
fis <- function() vapply(l, function(x) sum(is.element(x,v)), 0)
library(stringi)
fN <- function() { m1 <- stri_list2matrix(l)
colSums(`dim<-`(m1 %in% v, dim(m1)))}
set.seed(29)
v <- paste0(sample(letters, 20, replace=FALSE), 1:100)
l <- replicate(1e5,paste0(sample(letters[1:20], sample(10),
replace=TRUE), 1:50), simplify=FALSE)
system.time(fr())
# user system elapsed
# 2.914 0.006 2.927
system.time(fl())
# user system elapsed
# 3.023 0.008 3.036
system.time(res1 <- fis())
# user system elapsed
# 0.804 0.003 0.809
system.time(res2 <- fv())
# user system elapsed
# 3.285 0.006 3.301
system.time(res3 <- fN())
# user system elapsed
# 0.380 0.016 0.396
all.equal(res1, res2)
#[1] TRUE
all.equal(res1, res3)
#[1] TRUE
从system.time
中选择一些有效的方法并运行microbenchmark
library(microbenchmark)
microbenchmark(fv(), fis(), fN(), unit='relative', times=20L)
#Unit: relative
#expr min lq mean median uq max neval cld
# fv() 14.665591 13.534939 13.474595 12.224042 14.058256 15.172026 20 c
# fis() 3.442919 3.261627 4.161515 2.930333 3.440015 8.006546 20 b
# fN() 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 20 a