id bleed episodes
J1 0 0
JI 0 1
J1 0 1
JI yes 0
J2 0 0
J2 0 1
J2 0 1
J2 0 1
J2 yes 0
J2 0 0
J3 0 1
J3 0 1
J3 0 0
J3 0 1
J3 yes 0
J3 0 0
我想折叠这些数据来计算每个人看起来像这样的出血发生次数的剧集数
id episodes
J1 2
J2 3
J3 3
观察是在不同时间进行的,我没有包括时间变量,而是每周
答案 0 :(得分:3)
使用此示例输入
dd <- structure(list(id = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("J1", "J2", "J3"), class = "factor"),
bleed = structure(c(1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 1L, 1L, 2L, 1L), .Label = c("0", "yes"), class = "factor"),
episodes = c(0L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 1L,
1L, 0L, 1L, 0L, 0L)), .Names = c("id", "bleed", "episodes"
), class = "data.frame", row.names = c(NA, -16L))
您可以使用dplyr
library(dplyr)
dd %>% group_by(id) %>%
mutate(bleed_count=cumsum(bleed=="yes")) %>%
filter(bleed_count==0) %>%
summarize(episodes = sum(episodes))
对布尔值使用cumsum()
来跟踪出血的时间。然后我们在第一次出血之前仅将这些值相加
答案 1 :(得分:0)
我们也可以使用data.table
library(data.table)
setDT(df1)[, .(episodes = sum(episodes[seq_len(which(bleed == "yes")[1])])), id]
# id episodes
#1: J1 2
#2: J2 3
#3: J3 3
df1 <- structure(list(id = c("J1", "J1", "J1", "J1", "J2", "J2", "J2",
"J2", "J2", "J2", "J3", "J3", "J3", "J3", "J3", "J3"), bleed = c("0",
"0", "0", "yes", "0", "0", "0", "0", "yes", "0", "0", "0", "0",
"0", "yes", "0"), episodes = c(0L, 1L, 1L, 0L, 0L, 1L, 1L, 1L,
0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L)), .Names = c("id", "bleed", "episodes"
), class = "data.frame", row.names = c(NA, -16L))