最大化R中的目标函数

时间:2017-03-29 14:45:59

标签: r optimization

我想在给定差异大小(自变量)的情况下找到最大值(响应)。

以下是一些数据:

 x <- "A B C 
 1  0.63 0.67 0.61
 2  0.62 0.64 0.60
 3  0.64 0.65 0.59
 4  0.70 0.70 0.63
 5  0.71 0.73 0.68
 6  0.70 0.75 0.69
 7  0.71 0.75 0.70
 8  0.74 0.76 0.71
 9  0.79 0.81 0.74
10 0.80 0.83 0.76
11 0.82 0.84 0.78
12 0.82 0.84 0.80
13 0.83 0.85 0.81
14 0.81 0.88 0.80
15 0.78 0.84 0.77
16 0.75 0.79 0.74
17 0.73 0.77 0.72
18 0.72 0.75 0.71
19 0.73 0.75 0.71
20 0.73 0.75 0.71
21 0.74 0.76 0.72
22 0.72 0.76 0.71
23 0.71 0.74 0.69
24 0.73 0.75 0.70
25 0.78 0.79 0.71
26 0.82 0.84 0.77
27 0.80 0.84 0.78
28 0.77 0.81 0.76
29 0.79 0.81 0.75
30 0.83 0.84 0.78
31 0.86 0.87 0.82
32 0.85 0.87 0.83
33 0.83 0.84 0.82
34 0.78 0.85 0.77
35 0.74 0.80 0.72
36 0.72 0.76 0.71
37 0.74 0.77 0.70
38 0.75 0.75 0.70
39 0.78 0.81 0.72
40 0.78 0.82 0.75" 

# Or generate it like this
x <- data.frame(
  A = c(0.63, 0.62, 0.64, 0.7, 0.71, 0.7, 0.71, 0.74, 0.79, 0.8, 0.82, 0.82, 0.83, 0.81, 0.78, 0.75, 0.73, 0.72, 0.73, 0.73, 0.74, 0.72, 0.71, 0.73, 0.78, 0.82, 0.8, 0.77, 0.79, 0.83, 0.86, 0.85, 0.83, 0.78, 0.74, 0.72, 0.74, 0.75, 0.78, 0.78),
  B = c(0.67, 0.64, 0.65, 0.7, 0.73, 0.75, 0.75, 0.76, 0.81, 0.83, 0.84, 0.84, 0.85, 0.88, 0.84, 0.79, 0.77, 0.75, 0.75, 0.75, 0.76, 0.76, 0.74, 0.75, 0.79, 0.84, 0.84, 0.81, 0.81, 0.84, 0.87, 0.87, 0.84, 0.85, 0.8, 0.76, 0.77, 0.75, 0.81, 0.82),
  C = c(0.61, 0.6, 0.59, 0.63, 0.68, 0.69, 0.7, 0.71, 0.74, 0.76, 0.78, 0.8, 0.81, 0.8, 0.77, 0.74, 0.72, 0.71, 0.71, 0.71, 0.72, 0.71, 0.69, 0.7, 0.71, 0.77, 0.78, 0.76, 0.75, 0.78, 0.82, 0.83, 0.82, 0.77, 0.72, 0.71, 0.7, 0.7, 0.72, 0.75))

这里有一些调整:

data <- read.table(text=x, header = TRUE)

data$diff_AC <- with(data, (A-C))
data$diff_AB <- with(data, (A-B))

with(data, plot(A~1, col=1))
with(data, points(B~1, col=2))
with(data, points(C~1, col=3))

计算回报:

data$retA <- with(data, as.numeric(c(0,diff(A))/lag(A,1)))

现在使用optim找出A与B和A与C的差异大小,给定A的所有数据,A(retA)的返回值最高。

This should be done separately for negative return and positive return

我试过这个,但我不确定如何将return A部分引入optim

max.rss <- function(data, par) { with(data, -sum((par[1] * (B - A) + (C - A))^2)) }
result <- optim(par = 0, max.rss, data = data, method = "Brent", lower = 0, upper = 1) 

修改

所以问题是在哪个级别(差异的大小)diff_ABdiff_AC应该是retA(系列A的回归)最高(最大化)diff_ABdiff_AC的最小差异大小retA最小(最高负率)。

with(data, plot(retA ~ diff_AB, ylim=c(-0.1,0.1), xlim=c(-.1,.1)))
with(data, points(retA ~ diff_AC, col="red3"))

EDIT2:

问题很可能没有明确定义,或者目前的解释并不一定有意义。

欢迎任何分析/建模潜在动态的建议!

编辑3:

这可能是基于现有答案的解决方案:

data$rank_min <- with(data, ave(retA, diff_AB, FUN=function(x) rank(x, ties.method="min")))
data$rank_max <- with(data, ave(retA, diff_AB, FUN=function(x) rank(x, ties.method="max")))

with(data, data[rank_min==min(rank_min), ])
with(data, data[rank_max==max(rank_max), ])

或(但我不确定这是否完全正确)

diff_binAB <- with(data, unique(diff_AB))
mse <- numeric(length(diff_binAB))

for(i in 1:length(diff_binAB)){
pwise <- with(data, lm(retA ~ diff_AB*(diff_AB < diff_binAB[i]) + diff_AB*(diff_AB >= diff_binAB[i])))
mse[i] <- summary(pwise)[6]
}

mse <- as.numeric(mse)
mse 

diff_binAB[which(mse==min(mse))]
# -0.07

1 个答案:

答案 0 :(得分:1)

如果您不想依赖单一观察(例如data[which.max(retA), ]),您可以使用10%分位数:

with(data, summary(diff_AB[retA < quantile(data$retA, 0.1)]))
with(data, summary(diff_AC[retA < quantile(data$retA, 0.1)]))

with(data, summary(diff_AB[retA > quantile(data$retA, 0.9)]))
with(data, summary(diff_AC[retA > quantile(data$retA, 0.9)]))

或估计依赖模型

m1 <- with(data, lm(retA~diff_AC+diff_AB))
summary(m1)

m2 <- with(data, lm(retA ~ I(diff_AC+diff_AB)))
summary(m2)

但您可能想要考虑时间序列残差的自相关:

plot(m2$residuals, type = "l")

linear model

例如,我们假设它是一个AR(1)过程:

library(astsa)

m3 <- with(data, sarima(retA, 1,0,0, xreg=cbind(diff_AB, diff_AC)))

linear model with AR(1) errors

然后根据此模型预测retA