我的数据如下:
id <- c(1,1,1,1,1,1,2,2,2,2,3,3,3,3,3,3,3,3,3,4,4,4)
start <- c(NA, NA, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA, 1, NA, NA, NA, NA, NA, 1, NA, NA, NA)
e <- as.data.frame(cbind(id, start))
我想在每次从start == 1开始或者有一个新的id时,用累积和的类比来填充NAs。 我做了一个for循环,但我的实际数据太长了,for-loop在最近的几天内结束了。有没有办法加快我的解决方案?我的目标变量可以复制如下:
e$target <- NA
for (i in 2:length(e$id)){
if (e$id[i]!=e$id[i-1]){
e$target[i] <- NA
} else {
e$target[i] <- e$target[i-1]+1
if (!is.na(e$start[i]==1)){
e$target[i] <- 0
}
}
}
答案 0 :(得分:2)
我们可以使用data.table
library(data.table)
setDT(e)[, target1 := seq_len(.N)-1,.(grp = cumsum(!is.na(start)), id)]
e[e[, c(.I[all(is.na(start))], .I[seq_len(which.max(!is.na(start))-1)]),
id]$V1, target1 := NA]
e
# id start target target1
# 1: 1 NA NA NA
# 2: 1 NA NA NA
# 3: 1 NA NA NA
# 4: 1 1 0 0
# 5: 1 NA 1 1
# 6: 1 NA 2 2
# 7: 2 NA NA NA
# 8: 2 NA NA NA
# 9: 2 1 0 0
#10: 2 NA 1 1
#11: 3 NA NA NA
#12: 3 NA NA NA
#13: 3 1 0 0
#14: 3 NA 1 1
#15: 3 NA 2 2
#16: 3 NA 3 3
#17: 3 NA 4 4
#18: 3 NA 5 5
#19: 3 1 0 0
#20: 4 NA NA NA
#21: 4 NA NA NA
#22: 4 NA NA NA
答案 1 :(得分:2)
您可以尝试tidyverse
。使用fill
向下拖动最新的非NA条目,然后用它们的长度序列替换这些值(-1是使序列从0开始)
library(tidyverse)
e %>%
group_by(id) %>%
mutate(target = start) %>%
fill(target) %>%
mutate(target = replace(target, !is.na(target), seq(length(target[!is.na(target)]))-1),
target = replace(target, start == 1, 0))
答案 2 :(得分:2)
另一个data.table
选项是:
library(data.table)
setDT(e)[, subgroup := cumsum(start==1 & !is.na(start)), by = id]
e[ , target2 := cumsum(is.na(start)), by = .(id, subgroup)][subgroup == 0, target2 := NA_integer_]
# id start target subgroup target2
#1: 1 NA NA 0 NA
#2: 1 NA NA 0 NA
#3: 1 NA NA 0 NA
#4: 1 1 0 1 0
#5: 1 NA 1 1 1
#6: 1 NA 2 1 2
#7: 2 NA NA 0 NA
#8: 2 NA NA 0 NA
#9: 2 1 0 1 0
#10: 2 NA 1 1 1
#11: 3 NA NA 0 NA
#12: 3 NA NA 0 NA
#13: 3 1 0 1 0
#14: 3 NA 1 1 1
#15: 3 NA 2 1 2
#16: 3 NA 3 1 3
#17: 3 NA 4 1 4
#18: 3 NA 5 1 5
#19: 3 1 0 2 0
#20: 4 NA NA 0 NA
#21: 4 NA NA 0 NA
#22: 4 NA NA 0 NA