我有以下问题。
第一步,假设我有一个包含3个事实(a,b,c)的数据框。
library(dplyr)
library(tidyr)
library(CVXR)
library(RcppRoll)
library(purrr)
set.seed(1234)
df = tibble(
a = c(4.88,5.03,5.11,4.77,5.04,5.05,4.94,4.95,4.94,4.91)
,b = c(652,600,622,706,796,689,649,609,616,942)
,c = c(101,95,96,105,93,86,106,90,100,91)
)
然后,我对b列和c列进行一些转换(例如,它是滚动总和,但在这里我想做更复杂的事情),并计算目标列(y)。
df = df %>%
mutate(b_roll_sum = roll_sum(b, n=3, fill=NA, align="right", na.rm = TRUE),
c_roll_sum = roll_sum(c, n=3, fill=NA, align="right", na.rm = TRUE)) %>%
mutate(y = (-1)*a+0.0002*b_roll_sum+0.0007*c_roll_sum+1)
# A tibble: 10 x 6
a b c b_roll_sum c_roll_sum y
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 4.88 652 101 NA NA NA
2 5.03 600 95 NA NA NA
3 5.11 622 96 1874 292 -3.53
4 4.77 706 105 1928 296 -3.18
5 5.04 796 93 2124 294 -3.41
6 5.05 689 86 2191 284 -3.41
7 4.94 649 106 2134 285 -3.31
8 4.95 609 90 1947 282 -3.36
9 4.94 616 100 1874 296 -3.36
10 4.91 942 91 2167 281 -3.28
现在的目标是在b和c列之间重新定位数字:
最大化y 。
我试图为此使用CVXR包,在这里我将目标定义为数据帧和对象Variable()的自定义函数。该代码似乎可以运行,但是结果是错误的,因为解决方案应该是将所有内容“重定位”到列c。输出却是另一种方式。
# calculate distribution in rows to keep them like before
dist_by_rows <- df %>% map2_dfr(.x = df %>% select(b, c)
,.y = df %>% select(b, c) %>% summarise_all(sum)
,.f = ~(.x/.y))
names(dist_by_rows) <- paste0(names(dist_by_rows), "_rows_dist")
df <- bind_cols(df, dist_by_rows)
# A tibble: 10 x 8
a b c b_roll_sum c_roll_sum y b_rows_dist c_rows_dist
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 4.88 652 101 NA NA NA 0.116 0.132
2 5.03 600 95 NA NA NA 0.107 0.124
3 5.11 622 96 1874 292 -3.53 0.110 0.125
4 4.77 706 105 1928 296 -3.18 0.125 0.137
5 5.04 796 93 2124 294 -3.41 0.141 0.121
6 5.05 689 86 2191 284 -3.41 0.122 0.112
7 4.94 649 106 2134 285 -3.31 0.115 0.138
8 4.95 609 90 1947 282 -3.36 0.108 0.117
9 4.94 616 100 1874 296 -3.36 0.109 0.130
10 4.91 942 91 2167 281 -3.28 0.167 0.119
# define function to optimize
funk <- function(df, vars_to_opt) {
df_new <- df %>%
mutate(
new_b = value(vars_to_opt)[1],
new_c = value(vars_to_opt)[2],
b = new_b*b_rows_dist,
c = new_c*c_rows_dist) %>%
mutate(b_roll_sum = roll_sum(b, n=3, fill=NA, align="right", na.rm = TRUE),
c_roll_sum = roll_sum(c, n=3, fill=NA, align="right", na.rm = TRUE)) %>%
mutate(y = (-1)*a+0.0002*b_roll_sum+0.0007*c_roll_sum+1)
df_new %>%
select(y) %>%
sum(., na.rm = T)
}
# test of function on "current status"
test <- Variable(2)
value(test) <- matrix(c(6881, 963), nrow = 2) #currently sum of b and c is 6881 and 963, respectively
> funk(df, vars_to_opt = test)
[1] -26.8452
> df %>% select(y) %>% sum(na.rm = T)
[1] -26.8452
# CVXR with constraints
mix_hat <- Variable(2)
objective <- Maximize(funk(df, vars_to_opt = mix_hat))
A <- matrix(rep(1, 2), nrow = 1)
B <- diag(1, nrow = 2)
constraint1 <- A %*% mix_hat == 7844 #sum of b and c keep like it was 7844
constraint2 <- B %*% mix_hat >= 0 #b & c non negative
problem <- Problem(objective, constraints = list(constraint1, constraint2))
result <- solve(problem, b = "GLPK")
> result$getValue(mix_hat)
[,1]
[1,] 7844
[2,] 0
> result$value
[1] -31.71