我想使用一些操作来更新一列(销售)中某些行的值:
我想计算1992年1992年的葡萄酒销售额(24.13-16.389 = 7.7455),并使用该值计算1993年的销售额(10.74 = 34.871-24.12),依此类推。由于原始数据集非常庞大,因此以下循环将永远持续下去。是否有另一种方法(如定义函数)可以在不使用循环的情况下做到这一点?
for (i in 1992:1993){
for (j in 1992:1993){
vy <- vy %>%
mutate(retired= ifelse(vintage<year, beta*sales,retired)) %>%
group_by(vintage) %>%
mutate(cumsum_retired = cumsum(retired)) %>%
mutate(remained =sales-cumsum_retired) %>%
ungroup() %>%
group_by(year) %>%
mutate(cumsum_remained =cumsum(remained)) %>%
mutate(sales =ifelse(year==vintage & vintage==j & year==i & is.na(sales), stock-lag(cumsum_remained), sales)) %>%
mutate(sales =ifelse(sales<0, 0, sales)) %>%
ungroup() %>%
group_by(vintage) %>%
fill(sales)
}
}
数据:
vy <- structure(list(year = c(1990, 1991, 1992, 1993, 1990, 1991, 1992,
1993, 1990, 1991, 1992, 1993, 1990, 1991, 1992, 1993), vintage = c(1990,
1990, 1990, 1990, 1991, 1991, 1991, 1991, 1992, 1992, 1992, 1992,
1993, 1993, 1993, 1993), beta = c(0, 0, 2e-04, 6e-04, 0, 0, 0,
2e-04, 0, 0, 0, 0, 0, 0, 0, 0), stock = c(12.996, 12.996, 12.996,
12.996, 16.392, 16.392, 16.392, 16.392, 24.135, 24.135, 24.135,
24.135, 34.871, 34.871, 34.871, 34.871), sales = c(12.996, 12.996,
12.996, 12.996, 3.396, 3.396, 3.396, 3.396, NA, NA, NA, NA, NA,
NA, NA, NA), retired = c(0, 0, 0.003, 0.008, 0, 0, 0, 0.001,
0, 0, 0, NA, 0, 0, 0, 0), cumsum_retired = c(0, 0, 0.003, 0.011,
0, 0, 0, 0.001, 0, 0, 0, NA, 0, 0, 0, 0), remained = c(13, 13,
13, 13, 3, 3, 3, 3, NA, NA, NA, NA, NA, NA, NA, NA), cumsum_remained = c(13,
13, 13, 13, 16, 16, 16, 16, NA, NA, NA, NA, NA, NA, NA, NA),
`3` = c(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -16L))
预期结果:
vy<- structure(list(year = c(1990, 1991, 1992, 1993, 1990, 1991, 1992,
1993, 1990, 1991, 1992, 1993, 1990, 1991, 1992, 1993), vintage = c(1990,
1990, 1990, 1990, 1991, 1991, 1991, 1991, 1992, 1992, 1992, 1992,
1993, 1993, 1993, 1993), beta = c(0, 0, 2e-04, 6e-04, 0, 0, 0,
2e-04, 0, 0, 0, 0, 0, 0, 0, 0), stock = c(12.996, 12.996, 12.996,
12.996, 16.392, 16.392, 16.392, 16.392, 24.135, 24.135, 24.135,
24.135, 34.871, 34.871, 34.871, 34.871), sales = c(12.996, 12.996,
12.996, 12.996, 3.396, 3.396, 3.396, 3.396, NA, NA, 7.7455992,
7.7455992, NA, NA, NA, 10.7444768), retired = c(0, 0, 0.0025992,
0.0077976, 0, 0, 0, 0.0006792, 0, 0, 0, 0, 0, 0, 0, 0), cumsum_retired = c(0,
0, 0.0025992, 0.0103968, 0, 0, 0, 0.0006792, 0, 0, 0, 0, 0, 0,
0, 0), remained = c(12.996, 12.996, 12.9934008, 12.9856032, 3.396,
3.396, 3.396, 3.3953208, NA, NA, 7.7455992, 7.7455992, NA, NA,
NA, NA), cumsum_remained = c(12.996, 12.996, 12.9934008, 12.9856032,
16.392, 16.392, 16.3894008, 16.380924, NA, NA, 24.135, 24.1265232,
NA, NA, NA, NA), `3` = c(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3)), row.names = c(NA, -16L), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), vars = "v", drop = TRUE, indices = list(
0:3, 4:7, 8:11, 12:15), group_sizes = c(4L, 4L, 4L, 4L), biggest_group_size = 4L, labels = structure(list(
v = c(1990, 1991, 1992, 1993)), row.names = c(NA, -4L), class = "data.frame", vars = "v", drop = TRUE))