我有一个简单的数据集,其中包含date
,sample
和application
。每天sample
中的值应减少1
,然后增加所应用的值。
当我们使用另一个sample
时,我们将从新值开始。
结果在变量estimate
中给出。我正在尝试计算estimate
中的变量R
。
这是我的数据集:
structure(
list(
date = structure(c(17591, 17592, 17593, 17594,
17595, 17596, 17597, 17598, 17599, 17600, 17601, 17602, 17603,
17604, 17605, 17606, 17607, 17608, 17609, 17610, 17611, 17612,
17613, 17614), class = "Date"),
sample = c(30, NA, NA, NA, NA, 20, NA, NA, 40, NA, NA,
NA, NA, NA, 35, NA, NA, 15, NA, NA, NA,
NA, 5, NA),
applied = c(NA, NA, 10, NA, NA, NA, NA, 10, NA, NA,
15, NA, NA, NA, 10, NA, NA, NA, 15, NA, NA, 10, NA, NA),
estimate = c(30, 29, 38, 37, 36, 20, 19, 28, 40, 39, 53, 52,
51, 50, 35, 34, 33, 15, 29, 28, 27, 36, 5, 4)
),
class = "data.frame", row.names = c(NA, -24L)
)
答案 0 :(得分:2)
这可能需要清理一点,但它可以工作:
library(dplyr)
df %>%
mutate(group = cumsum(!is.na(sample))) %>%
group_by(group) %>%
mutate(
to_add = ifelse(!is.na(sample), 0, ifelse(is.na(applied), -1, applied - 1)),
result = first(sample) + cumsum(to_add)
)
# # A tibble: 24 x 7
# # Groups: group [6]
# date sample applied estimate group to_add result
# <date> <dbl> <dbl> <dbl> <int> <dbl> <dbl>
# 1 2018-03-01 30 NA 30 1 0 30
# 2 2018-03-02 NA NA 29 1 -1 29
# 3 2018-03-03 NA 10 38 1 9 38
# 4 2018-03-04 NA NA 37 1 -1 37
# 5 2018-03-05 NA NA 36 1 -1 36
# 6 2018-03-06 20 NA 20 2 0 20
# 7 2018-03-07 NA NA 19 2 -1 19
# 8 2018-03-08 NA 10 28 2 9 28
# 9 2018-03-09 40 NA 40 3 0 40
# 10 2018-03-10 NA NA 39 3 -1 39
# # ... with 14 more rows
答案 1 :(得分:1)
ave(df1$sample,
cumsum(!is.na(df1$sample)),
FUN = function(x) max(x, na.rm = TRUE) + -1 * (seq_along(x) - 1)) +
ave(replace(df1$applied, is.na(df1$applied), 0),
cumsum(!is.na(df1$sample)),
FUN = cumsum)
# [1] 30 29 38 37 36 20 19 28 40 39 53 52 51 50 45 44 43 15 29 28 27 36 5 4
答案 2 :(得分:0)
df <- structure(list(date = structure(c(17591, 17592, 17593, 17594,
17595, 17596, 17597, 17598, 17599, 17600, 17601, 17602, 17603,
17604, 17605, 17606, 17607, 17608, 17609, 17610, 17611, 17612,
17613, 17614), class = "Date"),
sample = c(30, NA, NA, NA, NA,
20, NA, NA, 40, NA, NA, NA, NA, NA, 35, NA, NA, 15, NA, NA, NA,
NA, 5, NA),
applied = c(NA, NA, 10, NA, NA, NA, NA, 10, NA, NA,
15, NA, NA, NA, 10, NA, NA, NA, 15, NA, NA, 10, NA, NA),
estimate = c(30,29, 38, 37, 36, 20, 19, 28, 40, 39, 53, 52, 51, 50, 35, 34, 33,
15, 29, 28, 27, 36, 5, 4)), class = "data.frame",
row.names = c(NA,-24L))
require(dplyr)
## create asubset without "estimate"
df2 <- df[,1:3]
df2 %>% arrange(date) %>%
mutate(sample_id = cumsum(!is.na(sample))) %>% #create an id for each sample
mutate(applied = if_else(is.na(applied),0, applied)) %>% # turn NA applied to 0
mutate(applied = if_else(is.na(sample), applied -1, applied)) %>% # turn applied to -1 when sample is NA
mutate(estimate0 = if_else(is.na(sample), 0, sample)) %>% # duplicate sample column
mutate(estimate0 = estimate0 + applied) %>% # horizontal calculation sample + applied
group_by(sample_id) %>% # group by to restrict the cumsum
arrange(date) %>% # necessary order for the cumsum
mutate(estimate = cumsum(estimate0)) %>% # cumsum
ungroup %>%
select(date, sample, applied, estimate) %>%
print(n = 24)
# A tibble: 24 x 4
date sample applied estimate
<date> <dbl> <dbl> <dbl>
1 2018-03-01 30 0 30
2 2018-03-02 NA -1 29
3 2018-03-03 NA 9 38
4 2018-03-04 NA -1 37
5 2018-03-05 NA -1 36
6 2018-03-06 20 0 20
7 2018-03-07 NA -1 19
8 2018-03-08 NA 9 28
9 2018-03-09 40 0 40
10 2018-03-10 NA -1 39
11 2018-03-11 NA 14 53
12 2018-03-12 NA -1 52
13 2018-03-13 NA -1 51
14 2018-03-14 NA -1 50
15 2018-03-15 35 10 45
16 2018-03-16 NA -1 44
17 2018-03-17 NA -1 43
18 2018-03-18 15 0 15
19 2018-03-19 NA 14 29
20 2018-03-20 NA -1 28
21 2018-03-21 NA -1 27
22 2018-03-22 NA 9 36
23 2018-03-23 5 0 5
24 2018-03-24 NA -1 4