我有一个数据框,其中2个变量填充了大约100K的值。
我需要在数据框中实现incremental counter
作为第三列
以下是数据框,
**A** **B**
1 2
4 3
3 2
2 0
1 0
2 0
3 2
1 2
3 2
2 0
2 0
3 0
4 0
2 0
9 1
此外,在实施计数器之前有一定的条件要满足如下,
一个。只有A>=1
和B=0
时,计数器才会递增
湾条件A>=1
和B=0
满足的前2个数据点不应增加counter
。
我的预期输出如下。
**A** **B** **Incremental Counter
1 2 0
4 3 0
3 2 0
2 0 0
1 0 0
2 0 1
3 2 0
1 2 0
3 2 0
2 0 0
2 0 0
3 0 2
4 0 3
2 0 4
9 1 0
谢谢,
答案 0 :(得分:2)
假设条件为A >=1 & B ==0
,我们可以使用data.table
library(data.table)
i1 <- setDT(df1)[, grp := rleid(A >= 1 & B==0)][, .I[A >= 1 & B==0 & seq_len(.N)>2], grp]$V1
df1[i1, IncrementalCounter := seq_len(.N)][is.na(IncrementalCounter),
IncrementalCounter := 0][, grp := NULL][]
# A B IncrementalCounter
# 1: 1 2 0
# 2: 4 3 0
# 3: 3 2 0
# 4: 2 0 0
# 5: 1 0 0
# 6: 2 0 1
# 7: 3 2 0
# 8: 1 2 0
# 9: 3 2 0
#10: 2 0 0
#11: 2 0 0
#12: 3 0 2
#13: 4 0 3
#14: 2 0 4
#15: 9 1 0
我们也可以使用base R
rle
rl <- with(df1, rle(A >=1 & B ==0))
r2 <- inverse.rle(within.list(rl, {i1 <- which(values)
lengths[i1-1] <- lengths[i1-1] + 2
lengths[i1] <- lengths[i1] - 2
}))
cumsum(r2)*r2
#[1] 0 0 0 0 0 1 0 0 0 0 0 2 3 4 0
df1 <- structure(list(A = c(1L, 4L, 3L, 2L, 1L, 2L, 3L, 1L, 3L, 2L,
2L, 3L, 4L, 2L, 9L), B = c(2L, 3L, 2L, 0L, 0L, 0L, 2L, 2L, 2L,
0L, 0L, 0L, 0L, 0L, 1L)), .Names = c("A", "B"), class = "data.frame",
row.names = c(NA, -15L))
答案 1 :(得分:1)
使用基数R:
indices = df$A>=1 & df$B==0
reset.counter = indices!=c(NA,head(indices,-1))& indices==FALSE & c(NA,head(indices,-1))==TRUE
indices <- unname(split(indices, cumsum(seq_along(indices) %in% which(reset.counter))))
indices=unlist(lapply(indices, function(x) cumsum(x)>2 & x==TRUE ))
indices[indices] = seq(1,sum(indices))
df$C = indices
输出:
A B C
1 1 2 0
2 4 3 0
3 3 2 0
4 2 0 0
5 1 0 0
6 2 0 1
7 3 2 0
8 1 2 0
9 3 2 0
10 2 0 0
11 2 0 0
12 3 0 2
13 4 0 3
14 2 0 4
15 9 1 0