我在数据框中有一些数据,在匹配条件上我希望开始计数直到下一个匹配条件。举个例子,当数据框中有一个<-1
数据点时,计数器从1开始计数,然后向上计数:2,3,4,5,6,7,8,10等。直到下一个<-1
,计数器将从1,2,3,4,5,6,7,8等重复计数,直到下一个匹配<-1
。
下面是一个示例数据框,其中包含所需的输出:
roll.z.score.n3 sig.count
1 NA 0
2 NA 0
3 -1.135974424 1
4 0.193311168 2
5 0.714285714 3
6 -1.148753543 1
7 -0.942160394 2
8 0.695763683 3
9 1.140646864 4
10 0.985196899 5
11 -0.768766574 6
12 -1.011293858 1
13 -0.516703612 2
14 -1.120897077 1
15 1.091089451 2
16 0.968364052 3
17 0.872871561 4
18 1.099524999 5
19 0.918397948 6
答案 0 :(得分:1)
这是一个使用dplyr包中的函数和data.table包中的rleid函数的解决方案。 dt2是最终输出。
# Load packages
library(dplyr)
# Process the data
dt2 <- dt %>%
mutate(small_minus_1 = ifelse(roll.z.score.n3 < -1, 1, 0)) %>%
mutate(Group = data.table::rleid(small_minus_1)) %>%
mutate(Group = ifelse(Group %% 2 == 1, Group - 1, Group)) %>%
group_by(Group) %>%
mutate(sig.count = ifelse(is.na(small_minus_1), 0, 1:n())) %>%
ungroup() %>%
select(roll.z.score.n3, sig.count)
dt <- structure(list(roll.z.score.n3 = c(NA, NA, -1.135974424, 0.193311168,
0.714285714, -1.148753543, -0.942160394, 0.695763683, 1.140646864,
0.985196899, -0.768766574, -1.011293858, -0.516703612, -1.120897077,
1.091089451, 0.968364052, 0.872871561, 1.099524999, 0.918397948
)), .Names = "roll.z.score.n3", row.names = c("1", "2", "3",
"4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15",
"16", "17", "18", "19"), class = "data.frame")
答案 1 :(得分:1)
a=sapply(dt,function(i) which(i< -1))
b=cbind.data.frame(dt,count=unlist(lapply(diff(c(1,a,nrow(dt)+1)),function(i) 1:i)))
b$count[is.na(b$roll.z.score.n3)]=0
或者您可以专门针对此数据使用以下内容。上面的一般是:
a=with(dt,diff(c(1,which(roll.z.score.n3< -1),nrow(b)+1)))
b=cbind.data.frame(dt,count=unlist(sapply(a,function(i) 1:i)))
b$count[is.na(b$roll.z.score.n3)]=0
答案 2 :(得分:0)
快速演示,不确定它是否符合您的要求:
set.seed(63112)
x <- rnorm(20, -1, 1)
df <- data.frame(x = x, count = NA)
counter <- 0
seen_neg_one <- F
for (i in 1:nrow(df)) {
if (df$x[i] < -1) {
counter <- 1
df$count[i] <- 1
seen_neg_one <- T
} else if (is.na(df$x[i])) {
df$count[i] <- 0
} else {
if (!seen_neg_one) {
df$count[i] <- 0
} else {
counter <- counter + 1
df$count[i] <- counter
}
}
}