我有以下数据集,我希望使用允许所有可能组合的循环策略来比较它们包含的元素中的相似性(即" setA,setB,setC,setD&#34 ;;&# 34; setA,setB,setC&#34 ;;" setA,setB"," setB,setC,setD&#34 ;;" setC,setD"" setB,setD" etc
数据集:
setA <- c("dog", "cat", "cow", "sheep", "dunkey")
setB <- c("fox", "cat", "cow", "snake")
setC <- c("dog", "cat", "cow", "sheep", "dunkey", "fox", "python")
setD <- c("dog", "cat", "lion", "sheep", "elephant", "fox")
不确定如何在R中对此进行编码,但这是我未能产生预期结果的尝试:
similar <- function(...){
Reduce(intersect, list(...))
}
allSets <- list(setA, setB, setC, setD)
for(i in 1:length(allSets)){
similar(allSets[[i]])
similar(allSets[i-1])
similar(allSets[i-2])
similar(allSets[i-3])
}
任何人都可以帮忙吗?
答案 0 :(得分:2)
以下是previous post获取所有交叉点的函数
## Build intersections, 'out' accumulates the result
intersects <- function(sets, out=NULL) {
if (length(sets) < 2) return ( out ) # return result
len <- seq(length(sets))
if (missing(out)) out <- list() # initialize accumulator
for (idx in split((inds <- combn(length(sets), 2)), col(inds))) { # 2-way combinations
ii <- len > idx[2] & !(len %in% idx) # indices to keep for next intersect
out[[(n <- paste(names(sets[idx]), collapse="."))]] <- intersect(sets[[idx[1]]], sets[[idx[2]]])
out <- intersects(append(out[n], sets[ii]), out=out)
}
out
}
## Put the sets in a list
sets <- mget(paste0("set", toupper(letters[1:4])))
intersects(sets)
# $setA.setB
# [1] "cat" "cow"
#
# $setA.setB.setC
# [1] "cat" "cow"
#
# $setA.setB.setC.setD
# [1] "cat"
#
# $setA.setB.setD
# [1] "cat"
#
# $setC.setD
# [1] "dog" "cat" "sheep" "fox"
#
# $setA.setC
# [1] "dog" "cat" "cow" "sheep" "dunkey"
#
# $setA.setC.setD
# [1] "dog" "cat" "sheep"
#
# $setA.setD
# [1] "dog" "cat" "sheep"
#
# $setB.setC
# [1] "fox" "cat" "cow"
#
# $setB.setC.setD
# [1] "fox" "cat"
#
# $setB.setD
# [1] "fox" "cat"