有没有人知道如何实现以下目标:从这里开始
df <- data.frame(var = c(0,0,1,1,0,0,0,1,1,0,0,0,0,1,1))
并实现这一目标:
df <- data.frame(var = c(0,0,1,1,0,0,0,1,1,0,0,0,0,1,1),
newvar = c(0,0,1,1,0,0,0,2,2,0,0,0,0,3,3))
答案 0 :(得分:3)
以下是rle
的选项,可以替换&#39;值&#39;与这些值的序列不为0,然后调用inverse_rle
以获取完整的vector
df$newvar <- inverse.rle(within.list(rle(df$var),
values[values!=0] <- seq_along(values[values!=0])))
df
# var newvar
#1 0 0
#2 0 0
#3 1 1
#4 1 1
#5 0 0
#6 0 0
#7 0 0
#8 1 2
#9 1 2
#10 0 0
#11 0 0
#12 0 0
#13 0 0
#14 1 3
#15 1 3
答案 1 :(得分:2)
你可以试试以下:
df %>%
mutate(n=ifelse(var==lead(var,default = 0),1,0)) %>%
mutate(n2=ifelse(var==0,0,n)) %>%
mutate(res=ifelse(var==1, cumsum(n2),0))
var n n2 res
1 0 1 0 0
2 0 0 0 0
3 1 1 1 1
4 1 0 0 1
5 0 1 0 0
6 0 1 0 0
7 0 0 0 0
8 1 1 1 2
9 1 0 0 2
10 0 1 0 0
11 0 1 0 0
12 0 1 0 0
13 0 0 0 0
14 1 1 1 3
15 1 0 0 3
然后select(var, res)
只显示您需要的列。
答案 2 :(得分:1)
有效的解决方案:
df %>%
mutate(temp= var - lag(var,default=df$var[1])) %>%
mutate(newvar= var * cumsum(temp>0))
或没有附加栏:
df %>%
mutate(newvar= var - lag(var,default=df$var[1])) %>%
mutate(newvar= var * cumsum(newvar>0))
var temp newvar
1 0 0 0
2 0 0 0
3 1 1 1
4 1 0 1
5 0 -1 0
6 0 0 0
7 0 0 0
8 1 1 2
9 1 0 2
10 0 -1 0
11 0 0 0
12 0 0 0
13 0 0 0
14 1 1 3
15 1 0 3
答案 3 :(得分:1)
另一个有趣的事情:
library(data.table)
setDT(df)
tmp = 0
df[, newvar := if(var[1] != 0) tmp <- tmp + 1 else 0, by = rleid(var)][]
还有一个:
df[, newvar := var * cumsum(diff(c(0, var)) == 1)]
# or if still a data.frame
within(df, newvar <- var * cumsum(diff(c(0, var)) == 1))