我有一个指数平滑功能。我需要按群组应用此时间序列。在开始时我需要设置固定的初始值,然后每年该函数计算取决于前一年结果的结果(或者如果是第一年则计算初始值)。
我有很多数据,速度是主要关注点。那么如何使用dplyr
或tidyverse
?
以下代码有效,但只是基于initialValues
。
library(tidyverse)
library(expm)
# Function:
f <- function(L1, L2, L3, L4, L5, A) {
solve(A) %*% (expm(A) %*% (A %*% initialValues + c(L1, L2, L3, L4, L5)))
}
# Data:
df <- as_tibble(list(year = rep(2000:2002, 2),
id = rep(letters[1:2], 3),
L1 = sample(1:10, 6),
L2 = sample(1:10, 6),
L3 = sample(1:10, 6),
L4 = sample(1:10, 6),
L5 = sample(1:10, 6),
A = list(matrix(runif(25, 0, 1), ncol = 5),
matrix(runif(25, 0, 1), ncol = 5),
matrix(runif(25, 0, 1), ncol = 5),
matrix(runif(25, 0, 1), ncol = 5),
matrix(runif(25, 0, 1), ncol = 5),
matrix(runif(25, 0, 1), ncol = 5)
)))
initialValues <- c(5, 5, 6, 8, 9)
# Call:
final <- df %>%
group_by(id) %>%
mutate(result = pmap(list(L1, L2, L3, L4, L5, A), f))
上述函数f
适用于第一年,但是第二年它应该是这样的:
solve(A) %*% (expm(A) %*% (A %*% dplyr::lag(result) + c(L1, L2,
L3, L4, L5)))
OR:
solve(A) %*% (expm(A) %*% (A %*% result[i - 1] + c(L1, L2, L3, L4,
L5)))
但result
本身不能在pmap
内以这种方式引用。
编辑:使用辅助变量和函数中的条件case_when
,我可以按组id_nr
引用前一个值,但这个解决方案很笨拙。有更好的想法吗?
f1 <- function(id_nr, L1, L2, L3, L4, L5, A) {
case_when(id_nr == 1 ~ solve(A) %*% (expm(A) %*% (A %*% initialValues
+ c(L1, L2, L3, L4, L5))),
TRUE ~ NA_real_ )
}
f2 <- function(id_nr, L1, L2, L3, L4, L5, A, onebefore) {
case_when(id_nr == 2 ~ solve(A) %*% (expm(A) %*% (A %*% onebefore +
c(L1, L2, L3, L4, L5))),
TRUE ~ NA_real_ )
}
f3 <- function(id_nr, L1, L2, L3, L4, L5, A, onebefore) {
case_when(id_nr == 3 ~ solve(A) %*% (expm(A) %*% (A %*% onebefore +
c(L1, L2, L3, L4, L5))),
TRUE ~ NA_real_ )
}
final <- df %>%
group_by(id) %>%
mutate(id_nr = 1:n(),
result = pmap(list(id_nr, L1, L2, L3, L4, L5, A), f1),
result2 = pmap(list(id_nr, L1, L2, L3, L4, L5, A, result[1]), f2),
result3 = pmap(list(id_nr, L1, L2, L3, L4, L5, A, result2[2]), f3)
) %>%
select(year, id, id_nr, result, result2, result3) %>%
as.data.frame()
给出:
# year id id_nr result
# 1 2000 a 1 69.99273, 187.46908, 133.68695, 39.14645, 192.07844
# 2 2001 b 1 150.08891, 105.06450, 134.75766, 143.28060, 86.68116
# 3 2002 a 2 NA, NA, NA, NA, NA
# 4 2000 b 2 NA, NA, NA, NA, NA
# 5 2001 a 3 NA, NA, NA, NA, NA
# 6 2002 b 3 NA, NA, NA, NA, NA
# result2 result3
# 1 NA, NA, NA, NA, NA
#NA, NA, NA, NA, NA
# 2 NA, NA, NA, NA, NA
#NA, NA, NA, NA, NA
# 3 1630.093, 2488.520, 2012.516, 1407.798, 1377.609
#NA, NA, NA, NA, NA
# 4 1751.489, 1444.543, 1531.545, 1922.810, 1544.579
#NA, NA, NA, NA, NA
# 5 NA, NA, NA, NA, NA 30153.83,
#36416.09, 19069.84, 18595.81, 31028.20
# 6 NA, NA, NA, NA, NA 22072.69,
#22904.23, 20731.95, 14812.70, 18054.79
(我仍然需要合并result
,result2
,result3
列。)