Question

我一直试图在R中估计一个相当混乱的非线性回归模型已经有一段时间了。在使用nls函数无数次尝试失败后，我现在正试着optim，我过去曾多次使用过它。对于此示例，我将使用以下数据：

x1 <- runif(1000,0,7)
x2 <- runif(1000,0,7)
x3 <- runif(1000,0,7)

y <- log(.5 + .5*x1 + .7*x2 + .4*x3 + .05*x1^2 + .1*x2^2 + .15*x3^2 - .05*x1*x2 - .1*x1*x3 - .07*x2*x3 + .02*x1*x2*x2) + rnorm(1000)

我想估计上面log（）函数中多项式表达式中的参数，因此我定义了以下函数来复制非线性最小二乘回归：

g <- function(coefs){

    fitted <- coefs[1] + coefs[2]*x1 + coefs[3]*x2 + coefs[4]*x3 + coefs[5]*x1^2 + coefs[6]*x2^2 + coefs[7]*x3^2 + coefs[8]*x1*x2 + coefs[9]*x1*x3 + coefs[10]*x2*x3 + coefs[11]*x1*x2*x3       
    error <- y - log(fitted)
    return(sum(error^2))
}

为了避免log（）表达式中的负起始值，我首先估计下面的线性模型：

lm.1 <- lm(I(exp(y)) ~ x1 + x2 + x3 + I(x1^2) + I(x2^2) + I(x3^2) + I(x1*x2) + I(x1*x3) + I(x2*x3) + I(x1*x2*x3))
intercept.start <- ifelse((min(fitted(lm.1)-lm.1$coefficients[1])) <= 0, -(min(fitted(lm.1)-lm.1$coefficients[1])) + .5, .5)
coefs.start <- c(intercept.start,lm.1$coefficients[-1])

定义intercept.start以上保证log（）内部的表达式在开始时将严格为正。但是，当我运行optim命令

时

nl.model <- optim(coefs.start, g, method="L-BFGS-B")

我收到以下错误消息

Error in optim(coefs.start, g, method = "L-BFGS-B") : 
L-BFGS-B needs finite values of 'fn'
In addition: Warning message:
In log(fitted) : NaNs produced

有谁知道如何强制optim例程忽略在log（）表达式中产生负值的参数估计？提前谢谢。

Answer 1

这是我调查工作的日志。我把最大值放在拟合值上并得到收敛。然后我问自己，增加最大值是否会对估计的参数做任何事情并发现没有变化......并且与起始值没有区别，所以我认为你搞砸了构建函数。也许你可以进一步调查：

> gp <- function(coefs){
+ 
+     fitted <- coefs[1] + coefs[2]*x1 + coefs[3]*x2 + coefs[4]*x3 + coefs[5]*x1^2 + coefs[6]*x2^2 + coefs[7]*x3^2 + coefs[8]*x1*x2 + coefs[9]*x1*x3 + coefs[10]*x2*x3 + coefs[11]*x1*x2*x3   }
> describe( gp( coefs.start) )   #describe is from pkg:Hmisc
gp(coefs.start) 
      n missing  unique    Info    Mean     .05     .10     .25     .50     .75 
   1000       0    1000       1   13.99   2.953   4.692   8.417  12.475  18.478 
    .90     .95 
 25.476  28.183 

lowest :  0.5000  0.5228  0.5684  0.9235  1.1487
highest: 41.0125 42.6003 43.1457 43.5950 47.2234 
> g <- function(coefs){
+ 
+     fitted <- max( coefs[1] + coefs[2]*x1 + coefs[3]*x2 + coefs[4]*x3 + coefs[5]*x1^2 + coefs[6]*x2^2 + coefs[7]*x3^2 + coefs[8]*x1*x2 + coefs[9]*x1*x3 + coefs[10]*x2*x3 + coefs[11]*x1*x2*x3     , 1000)
+     error <- y - log(fitted)
+     return(sum(error^2))
+ }
> nl.model <- optim(coefs.start, g, method="L-BFGS-B")
> nl.model
$par
                             x1              x2              x3         I(x1^2) 
     0.77811231     -0.94586233     -1.33540959      1.65454871      0.31537594 
        I(x2^2)         I(x3^2)      I(x1 * x2)      I(x1 * x3)      I(x2 * x3) 
     0.45717138      0.11051418      0.59197115     -0.25800792      0.04931727 
I(x1 * x2 * x3) 
    -0.08124126 

$value
[1] 24178.62

$counts
function gradient 
       1        1 

$convergence
[1] 0

$message
[1] "CONVERGENCE: NORM OF PROJECTED GRADIENT <= PGTOL"

> g <- function(coefs){
+ 
+     fitted <- max( coefs[1] + coefs[2]*x1 + coefs[3]*x2 + coefs[4]*x3 + coefs[5]*x1^2 + coefs[6]*x2^2 + coefs[7]*x3^2 + coefs[8]*x1*x2 + coefs[9]*x1*x3 + coefs[10]*x2*x3 + coefs[11]*x1*x2*x3     , 100000)
+     error <- y - log(fitted)
+     return(sum(error^2))
+ }
> nl.model <- optim(coefs.start, g, method="L-BFGS-B")
> nl.model
$par
                             x1              x2              x3         I(x1^2) 
     0.77811231     -0.94586233     -1.33540959      1.65454871      0.31537594 
        I(x2^2)         I(x3^2)      I(x1 * x2)      I(x1 * x3)      I(x2 * x3) 
     0.45717138      0.11051418      0.59197115     -0.25800792      0.04931727 
I(x1 * x2 * x3) 
    -0.08124126 

$value
[1] 89493.99

$counts
function gradient 
       1        1 

$convergence
[1] 0

$message
[1] "CONVERGENCE: NORM OF PROJECTED GRADIENT <= PGTOL"

Answer 2

这是一种略有不同的方法。

除了评论中提到的拼写错误，如果问题是log(...)的参数是＆lt; 0对于某些参数估计，您可以更改函数定义以防止这种情况。

# just some setup - we'll need this later
set.seed(1)
err <- rnorm(1000, sd=0.1)    # note smaller error sd
x1  <- runif(1000,0,7)
x2  <- runif(1000,0,7)
x3  <- runif(1000,0,7)
par <- c(0.5, 0.5, 0.7, 0.4, 0.05, 0.1, 0.15, -0.05, -0.1, -0.07, 0.02)
m   <- cbind(1, x1, x2, x3, x1^2, x2^2, x3^2, x1*x2, x1*x3, x2*x3, x1*x2*x3)
y  <- as.numeric(log(m %*% par)) + err
# note slight change in the model function definition
g <- function(coefs){
  fitted <- coefs[1] + coefs[2]*x1 + coefs[3]*x2 + coefs[4]*x3 + coefs[5]*x1^2 + coefs[6]*x2^2 + coefs[7]*x3^2 + coefs[8]*x1*x2 + coefs[9]*x1*x3 + coefs[10]*x2*x3 + coefs[11]*x1*x2*x3
  fitted <- ifelse(fitted<=0, 1, fitted)   # ensures fitted > 0
  error  <- y - log(fitted)
  return(sum(error^2))
}
lm.1 <- lm(I(exp(y)) ~ x1 + x2 + x3 + I(x1^2) + I(x2^2) + I(x3^2) + I(x1*x2) + I(x1*x3) + I(x2*x3) + I(x1*x2*x3))
nl.model <- optim(coef(lm.1), g, method="L-BFGS-B", control=list(maxit=1000))
nl.model$par
#     (Intercept)              x1              x2              x3         I(x1^2)         I(x2^2)         I(x3^2)      I(x1 * x2)      I(x1 * x3)      I(x2 * x3) I(x1 * x2 * x3) 
#      0.40453182      0.50136222      0.71696293      0.45335893      0.05461253      0.10210854      0.14913914     -0.06169715     -0.11195476     -0.08497180      0.02531717 
with(nl.model, cat(convergence, message))
# 0 CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH

请注意，这些估算非常接近实际值。那是因为在设置中我使用了一个较小的错误项（sd = 0.2而不是1）。在您的示例中，与响应（y）相比，错误很大，因此您基本上适合随机错误。

如果您使用实际参数值作为起始估计值来拟合模型，则会得到几乎相同的结果，而不会接近“真实”值。

nl.model <- optim(par, g,  method="L-BFGS-B", control=list(maxit=1000))
nl.model$par
#  [1]  0.40222956  0.50159930  0.71734810  0.45459606  0.05465654  0.10206887  0.14899640 -0.06177640 -0.11209065 -0.08497423  0.02533085
with(nl.model, cat(convergence, message))
# 0 CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH

尝试使用原始错误（sd = 1）并查看会发生什么。

由于NAs导致R失败

2 个答案: