r - 仅为特定值创建序列

时间:2018-03-29 11:54:44

标签: r dataframe dplyr

有没有人知道如何实现以下目标:从这里开始

df <- data.frame(var = c(0,0,1,1,0,0,0,1,1,0,0,0,0,1,1))

并实现这一目标:

df <- data.frame(var = c(0,0,1,1,0,0,0,1,1,0,0,0,0,1,1), 
                 newvar = c(0,0,1,1,0,0,0,2,2,0,0,0,0,3,3))

4 个答案:

答案 0 :(得分:3)

以下是rle的选项,可以替换&#39;值&#39;与这些值的序列不为0,然后调用inverse_rle以获取完整的vector

df$newvar <- inverse.rle(within.list(rle(df$var), 
          values[values!=0] <- seq_along(values[values!=0])))
df
#   var newvar
#1    0      0
#2    0      0
#3    1      1
#4    1      1
#5    0      0
#6    0      0
#7    0      0
#8    1      2
#9    1      2
#10   0      0
#11   0      0
#12   0      0
#13   0      0
#14   1      3
#15   1      3

答案 1 :(得分:2)

你可以试试以下:

df %>% 
  mutate(n=ifelse(var==lead(var,default = 0),1,0)) %>% 
  mutate(n2=ifelse(var==0,0,n)) %>% 
  mutate(res=ifelse(var==1, cumsum(n2),0))
   var n n2 res
1    0 1  0   0
2    0 0  0   0
3    1 1  1   1
4    1 0  0   1
5    0 1  0   0
6    0 1  0   0
7    0 0  0   0
8    1 1  1   2
9    1 0  0   2
10   0 1  0   0
11   0 1  0   0
12   0 1  0   0
13   0 0  0   0
14   1 1  1   3
15   1 0  0   3

然后select(var, res)只显示您需要的列。

答案 2 :(得分:1)

有效的解决方案:

df %>%
   mutate(temp= var - lag(var,default=df$var[1])) %>% 
   mutate(newvar= var * cumsum(temp>0))

或没有附加栏:

df %>%
  mutate(newvar= var - lag(var,default=df$var[1])) %>% 
  mutate(newvar= var * cumsum(newvar>0))

   var temp newvar
1    0    0      0
2    0    0      0
3    1    1      1
4    1    0      1
5    0   -1      0
6    0    0      0
7    0    0      0
8    1    1      2
9    1    0      2
10   0   -1      0
11   0    0      0
12   0    0      0
13   0    0      0
14   1    1      3
15   1    0      3

答案 3 :(得分:1)

另一个有趣的事情:

library(data.table)
setDT(df)

tmp = 0
df[, newvar := if(var[1] != 0) tmp <- tmp + 1 else 0, by = rleid(var)][]

还有一个:

df[, newvar := var * cumsum(diff(c(0, var)) == 1)]
# or if still a data.frame
within(df, newvar <- var * cumsum(diff(c(0, var)) == 1))