我有一个包含重复值的向量:
v =" 3,600" " 3600" " 3600" " 3600" " 3600" " 3600" " 3600" " 3600" " 3400" " 3400" " 3400" " 3600" " 3600" " 3600"
找到哪个元素的正确方法是什么,有多少是相同的,它们在哪里? 我想要一个输出:
var1:"3,600" rep : 11 position: 1:8, 12:14
var2:"3,400" rep : 3 position: 9:11
答案 0 :(得分:2)
sapply(unique(v), function(x){
temp = which((x == v))
data.frame(variable = x,
rep = sum(x == v),
position = paste(sapply(split(temp ,cumsum(c(1, diff(temp)!=1))), function(y)
paste(range(y), collapse = ":")), collapse = ", "),
stringsAsFactors = FALSE)
})
# 3,600 3,400
#variable "3,600" "3,400"
#rep 11 3
#position "1:8, 12:14" "9:11"
答案 1 :(得分:1)
我们可以使用split
lst <- split(seq_along(v), v)
lengths(lst)
# 3,400 3,600
# 3 11
如果我们需要OP的帖子中显示的输出
library(data.table)
data.table(v, i = seq_along(v))[, .(var = v[1], rep = .N,
position = paste(i[1], i[.N], sep=":")) , .(rleid(v))
][, .(rep=sum(rep), position = toString(position)), var]
# var rep position
#1: 3,600 11 1:8, 12:14
#2: 3,400 3 9:11
v <- c("3,600", "3,600", "3,600", "3,600", "3,600", "3,600", "3,600",
"3,600", "3,400", "3,400", "3,400", "3,600", "3,600", "3,600"
)