在不使用for循环的情况下在R中实现增量计数器

时间:2017-07-18 05:55:03

标签: r

我有一个数据框,其中2个变量填充了大约100K的值。 我需要在数据框中实现incremental counter作为第三列 以下是数据框,

    **A**               **B**       
    1                     2          
    4                     3                   
    3                     2                   
    2                     0                   
    1                     0                   
    2                     0                   
    3                     2                   
    1                     2                   
    3                     2                     
    2                     0                   
    2                     0                   
    3                     0                   
    4                     0                      
    2                     0                   
    9                     1                  

此外,在实施计数器之前有一定的条件要满足如下,
一个。只有A>=1B=0时,计数器才会递增 湾条件A>=1B=0满足的前2个数据点不应增加counter

我的预期输出如下。

    **A**               **B**      **Incremental Counter  
    1                     2                 0    
    4                     3                 0  
    3                     2                 0  
    2                     0                 0  
    1                     0                 0  
    2                     0                 1  
    3                     2                 0  
    1                     2                 0  
    3                     2                 0    
    2                     0                 0  
    2                     0                 0  
    3                     0                 2  
    4                     0                 3     
    2                     0                 4  
    9                     1                 0   

谢谢,

2 个答案:

答案 0 :(得分:2)

假设条件为A >=1 & B ==0,我们可以使用data.table

library(data.table)
i1 <- setDT(df1)[, grp := rleid(A >= 1 & B==0)][, .I[A >= 1 & B==0 & seq_len(.N)>2], grp]$V1
df1[i1, IncrementalCounter := seq_len(.N)][is.na(IncrementalCounter), 
            IncrementalCounter := 0][, grp := NULL][]   
#    A B IncrementalCounter
# 1: 1 2                  0
# 2: 4 3                  0
# 3: 3 2                  0
# 4: 2 0                  0
# 5: 1 0                  0
# 6: 2 0                  1
# 7: 3 2                  0
# 8: 1 2                  0
# 9: 3 2                  0
#10: 2 0                  0
#11: 2 0                  0
#12: 3 0                  2
#13: 4 0                  3
#14: 2 0                  4
#15: 9 1                  0

我们也可以使用base R

来处理rle
rl <- with(df1, rle(A >=1 & B ==0))
r2 <- inverse.rle(within.list(rl, {i1 <- which(values)
             lengths[i1-1] <- lengths[i1-1] + 2
             lengths[i1] <- lengths[i1] - 2

         }))

cumsum(r2)*r2
#[1] 0 0 0 0 0 1 0 0 0 0 0 2 3 4 0

数据

df1 <- structure(list(A = c(1L, 4L, 3L, 2L, 1L, 2L, 3L, 1L, 3L, 2L, 
2L, 3L, 4L, 2L, 9L), B = c(2L, 3L, 2L, 0L, 0L, 0L, 2L, 2L, 2L, 
0L, 0L, 0L, 0L, 0L, 1L)), .Names = c("A", "B"), class = "data.frame", 
row.names = c(NA, -15L))

答案 1 :(得分:1)

使用基数R:

indices = df$A>=1 & df$B==0
reset.counter = indices!=c(NA,head(indices,-1))& indices==FALSE & c(NA,head(indices,-1))==TRUE
indices <- unname(split(indices, cumsum(seq_along(indices) %in% which(reset.counter))))
indices=unlist(lapply(indices, function(x) cumsum(x)>2 & x==TRUE  ))
indices[indices] = seq(1,sum(indices))
df$C = indices

输出:

   A B C
1  1 2 0
2  4 3 0
3  3 2 0
4  2 0 0
5  1 0 0
6  2 0 1
7  3 2 0
8  1 2 0
9  3 2 0
10 2 0 0
11 2 0 0
12 3 0 2
13 4 0 3
14 2 0 4
15 9 1 0