我有一个逻辑向量
as.logical(c(0,0,1,1,1,0,1,1,0,0,0,1,1,1,1))
但是更长。我如何将其转换为:
c(0,0,1,2,3,0,1,2,0,0,0,1,2,3,4)
通过计算一个的长度?
答案 0 :(得分:7)
另一个rle
选项:
r <- rle(x)
x[x] <- sequence(r$l[r$v])
#[1] 0 0 1 2 3 0 1 2 0 0 0 1 2 3 4
或者不保存r
:
x[x] <- sequence(with(rle(x), lengths[values]))
答案 1 :(得分:4)
使用C ++通过Rcpp
library(Rcpp)
cppFunction('NumericVector seqOfLogical(LogicalVector lv) {
size_t n = lv.size();
NumericVector res(n);
int foundCounter = 0;
for (size_t i = 0; i < n; i++) {
if (lv[i] == 1) {
foundCounter++;
} else {
foundCounter = 0;
}
res[i] = foundCounter;
}
return res;
}')
seqOfLogical(x)
# [1] 0 0 1 2 3 0 1 2 0 0 0 1 2 3 4
library(microbenchmark)
set.seed(1)
x <- sample(c(T,F), size = 1e6, replace = T)
microbenchmark(
symbolix = { symbolix(x) },
thelatemail1 = { thelatemail1(x) },
thelatemail2 = { thelatemail2(x) },
wen = { wen(x) },
maurits = { maurits(x) },
#mhammer = { mhammer(x) }, ## this errors
times = 5
)
# Unit: milliseconds
# expr min lq mean median uq max neval
# symbolix 2.760152 4.579596 34.60909 4.833333 22.31126 138.5611 5
# thelatemail1 154.050925 189.784368 235.16431 235.982093 262.33704 333.6671 5
# thelatemail2 138.876834 146.197278 158.66718 148.547708 179.80223 179.9119 5
# wen 780.432786 898.505231 1091.39099 1093.702177 1279.33318 1404.9816 5
# maurits 1002.267323 1043.590621 1136.35624 1086.967756 1271.38803 1277.5675 5
symbolix <- function(x) {
seqOfLogical(x)
}
thelatemail1 <- function(x) {
r <- rle(x)
x[x] <- sequence(r$l[r$v])
return(x)
}
thelatemail2 <- function(x) {
x[x] <- sequence(with(rle(x), lengths[values]))
return(x)
}
maurits <- function(x) {
unlist(Map(function(l, v) if (!isTRUE(v)) rep(0, l) else 1:l, rle(x)$lengths, rle(x)$values))
}
wen <- function(A) {
B=data.table::rleid(A)
B=ave(B,B,FUN = seq_along)
B[!A]=0
B
}
mhammer <- function(x) {
x_counts <- x
for(i in seq_along(x)) {
if(x[i] == 1) { x_counts[i] <- x_counts[i] + x_counts[i-1] }
}
return(x_counts)
}
答案 2 :(得分:1)
您可以在create
中使用rleid
data.table
答案 3 :(得分:0)
x <- c(0,0,1,1,1,0,1,1,0,0,0,1,1,1,1)
x_counts <- x
for(i in seq_along(x)) {
if(x[i] == 1) { x_counts[i] <- x_counts[i] + x_counts[i-1] }
}
x_counts
答案 4 :(得分:0)
这是将基数R的rle
与Map
一起使用的解决方案
x <- as.logical(c(0,0,1,1,1,0,1,1,0,0,0,1,1,1,1))
unlist(Map(function(l, v) if (!isTRUE(v)) rep(0, l) else 1:l, rle(x)$lengths, rle(x)$values))
# [1] 0 0 1 2 3 0 1 2 0 0 0 1 2 3 4
或使用purrr::pmap
library(purrr);
unlist(pmap(unclass(rle(x)),
function(lengths, values) if (!isTRUE(values)) rep(0, lengths) else 1:lengths))
#[1] 0 0 1 2 3 0 1 2 0 0 0 1 2 3 4
答案 5 :(得分:0)
与Wen的稍有不同,我想到了:
library(data.table)
ave(v,rleid(v),FUN=function(x) x *seq_along(x))
# [1] 0 0 1 2 3 0 1 2 0 0 0 1 2 3 4
答案 6 :(得分:0)
我建议使用runner软件包和函数streak_run
来计算连续出现的次数。也可以在滑动窗口上进行计算(例如,最后5个观测值),更多信息请参见github documentation
x <- as.logical(c(0,0,1,1,1,0,1,1,0,0,0,1,1,1,1))
streak <- streak_run(x)
streak[x == 0] <- 0
print(streak)
# [1] 0 0 1 2 3 0 1 2 0 0 0 1 2 3 4