在匹配条件上,计算序列直到下一个匹配

时间:2017-07-26 22:55:40

标签: r

我在数据框中有一些数据,在匹配条件上我希望开始计数直到下一个匹配条件。举个例子,当数据框中有一个<-1数据点时,计数器从1开始计数,然后向上计数:2,3,4,5,6,7,8,10等。直到下一个<-1,计数器将从1,2,3,4,5,6,7,8等重复计数,直到下一个匹配<-1

下面是一个示例数据框,其中包含所需的输出:

 roll.z.score.n3 sig.count    
1           NA          0          
2           NA          0          
3   -1.135974424        1          
4   0.193311168         2          
5   0.714285714         3          
6   -1.148753543        1          
7   -0.942160394        2          
8   0.695763683         3          
9   1.140646864         4          
10  0.985196899         5          
11  -0.768766574        6          
12  -1.011293858        1          
13  -0.516703612        2          
14  -1.120897077        1          
15  1.091089451         2          
16  0.968364052         3          
17  0.872871561         4          
18  1.099524999         5          
19  0.918397948         6  

3 个答案:

答案 0 :(得分:1)

这是一个使用dplyr包中的函数和data.table包中的rleid函数的解决方案。 dt2是最终输出。

# Load packages
library(dplyr)

# Process the data
dt2 <- dt %>%
  mutate(small_minus_1 = ifelse(roll.z.score.n3 < -1, 1, 0)) %>%
  mutate(Group = data.table::rleid(small_minus_1)) %>%
  mutate(Group = ifelse(Group %% 2 == 1, Group - 1, Group)) %>%
  group_by(Group) %>%
  mutate(sig.count = ifelse(is.na(small_minus_1), 0, 1:n())) %>%
  ungroup() %>%
  select(roll.z.score.n3, sig.count)

输入数据

dt <- structure(list(roll.z.score.n3 = c(NA, NA, -1.135974424, 0.193311168, 
0.714285714, -1.148753543, -0.942160394, 0.695763683, 1.140646864, 
0.985196899, -0.768766574, -1.011293858, -0.516703612, -1.120897077, 
1.091089451, 0.968364052, 0.872871561, 1.099524999, 0.918397948
)), .Names = "roll.z.score.n3", row.names = c("1", "2", "3", 
"4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", 
"16", "17", "18", "19"), class = "data.frame")

答案 1 :(得分:1)

a=sapply(dt,function(i) which(i< -1))
b=cbind.data.frame(dt,count=unlist(lapply(diff(c(1,a,nrow(dt)+1)),function(i) 1:i)))
b$count[is.na(b$roll.z.score.n3)]=0

或者您可以专门针对此数据使用以下内容。上面的一般是:

a=with(dt,diff(c(1,which(roll.z.score.n3< -1),nrow(b)+1)))
b=cbind.data.frame(dt,count=unlist(sapply(a,function(i) 1:i)))
b$count[is.na(b$roll.z.score.n3)]=0

答案 2 :(得分:0)

快速演示,不确定它是否符合您的要求:

set.seed(63112)
x <- rnorm(20, -1, 1)
df <- data.frame(x = x, count = NA)

counter <- 0
seen_neg_one <- F
for (i in 1:nrow(df)) {
    if (df$x[i] < -1) {
        counter <- 1
        df$count[i] <- 1   
        seen_neg_one <- T
    } else if (is.na(df$x[i])) {
        df$count[i] <- 0
    } else {
        if (!seen_neg_one) {
            df$count[i] <- 0
        } else {
            counter <- counter + 1
            df$count[i] <- counter
        }
    }
}