包Optim.jl非常慢

时间:2018-06-08 01:12:12

标签: r julia

我已经构建了两个语言几乎完全相同的语言 R Julia

事先我知道有一些方法可以改善Julia代码的性能,因为我没有声明类型,而且通常非向量化代码往往更有效。但是,Julia代码和 R 代码都在相同的地形上进行比较。

注意:我开始学习朱莉娅一周,所以你可以说我没有用朱莉娅编程得很好。

我注意到Optim包提供了optimize()功能。与默认安装的统计数据包的 R 语言的optim()功能相比,这是非常慢的 R 语言。

以下是代码 R Julia

R代码:

rm(list=ls(all=TRUE))

Gexp <- function(par,x){
  lambda <- par[1]
  pexp(q = x, rate = lambda, lower.tail = TRUE, log.p = FALSE)
}

gexp <- function(par,x){
  lambda <- par[1]
  dexp(x = x, rate = lambda, log = FALSE)
}

QGexp <- function(p,...){
  qexp(p,...)
}

Gweibull <- function(par,x){
  alpha <- par[1]
  beta <- par[2]
  pweibull(q = x, shape = alpha, scale = beta, lower.tail = TRUE, log.p = FALSE)
}

QGweibull <- function(p,...){
  qweibull(p,...)
}

# Função de distribuição acumulada Exponentiated Kw-G class (EKw-G)
cdf_ekwg <- function(cdf,par,x,...){
  a <- par[1]
  b <- par[2]
  c <- par[3]
  npar <- c(...)

  (1 - (1 - cdf(par = npar, x = x)^a)^b)^c

}

# cdf_ekwg(cdf = G, par = c(0.2,0.4,0.21), x = 1, alpha = 1.1, beta = 1.2, lambda = 1)

# Função densidade de probabilidade Exponentiated Kw-G class (EKw-G)
pdf_ekwg <- function(cdf, pdf, par, x, ...){

  a <- par[1]
  b <- par[2]
  c <- par[3]
  #cdf_ekwg_locale <- function(x){
  #  cdf_ekwg(cdf = cdf, par = par, x, ...)
  #}

  npar <- c(...)

  g = pdf(par = npar, x = x)
  G = cdf(par = npar, x = x)

  a * b * c * g * G^(a-1) * (1-G^a)^(b-1) * (1 - (1-G^a)^b)^(c-1)

  #numDeriv::grad(func = cdf_ekwg_locale, x = x, method = "simple")
}

#integrate(f = pdf_ekwg, par = c(1,1,1.5), lower = 0, upper = Inf, cdf = Gexp, pdf = gexp,
#           lambda = 1.5)

# Será fixado os parâmetros de G. Serão estimados os parâmetros a, b e c do
# modelo EKwG.

sample_ekwg <- function(QG, n, par, ...){

  a <- par[1]
  b <- par[2]
  c <- par[3]

  u <- runif(n = n, min = 0, max = 1)
  p <- (1 - (1 - u^(1/c))^(1/b))^(1/a)

  QG(p = p, ...)

}

# Função de distribuição acumulada Exponentiated Kw-G class (EKw-G)
cdf_ekwg <- function(cdf,par,x,...){
  a <- par[1]
  b <- par[2]
  c <- par[3]
  npar <- c(...)

  (1 - (1 - cdf(par = npar, x = x)^a)^b)^c

}

# cdf_ekwg(cdf = G, par = c(0.2,0.4,0.21), x = 1, alpha = 1.1, beta = 1.2, lambda = 1)

# Função densidade de probabilidade Exponentiated Kw-G class (EKw-G)
pdf_ekwg <- function(cdf, pdf, par, x, ...){

  a <- par[1]
  b <- par[2]
  c <- par[3]
  #cdf_ekwg_locale <- function(x){
  #  cdf_ekwg(cdf = cdf, par = par, x, ...)
  #}

  npar <- c(...)

  g = pdf(par = npar, x = x)
  G = cdf(par = npar, x = x)

  a * b * c * g * G^(a-1) * (1-G^a)^(b-1) * (1 - (1-G^a)^b)^(c-1)

  #numDeriv::grad(func = cdf_ekwg_locale, x = x, method = "simple")
}

# integrate(f = pdf_ekwg, par = c(1.4,1.3,0.5), lower = 0, upper = Inf, cdf = G,  alpha = 1.1, beta = 1.2)

loglikelihood <- function(cdf, pdf, par, x, ...){
  -sum(log(pdf_ekwg(cdf = cdf, pdf = pdf, par = par, x = x, ...)))
}

myoptim <- function(...) tryCatch(optim(...), error = function(e) NA)

G = Gexp
g = gexp
data = sample_ekwg(QG = QGexp, n = 550, par = c(1,1,1), rate = 1.5)
starts = c(1,1,1)

set.seed(0)
start = Sys.time()
for(i in 1:5){
  result <- myoptim(par = starts, fn = loglikelihood, x = data, cdf = G,
                    pdf = g, method = "Nelder-Mead",rate = 1.5)
}
Sys.time() - start

Julia代码

using Distributions
#using Cubature # Calculo de integrais numéricas.
#using Plots
using Optim
#using JuMP
#using NLopt

function gexp(x,par)
    λ = par[1]
    λ * exp(-λ * x)
end

# valor = hquadrature(x -> gexp(x,1), 0, 100)[1]

function Gexp(x,par)
    λ = par[1]
    1- exp(-λ * x)
end

function QGexp(x,par)
    λ = par[1]
    # A função Exponential no pacote Distributions é reparametrizada
    # como 1/lambda. Dessa forma, para trabalhar com densidade na forma
    # λ * exp(-λ*x) é preciso tomar 1/λ.
    quantile.(Exponential(1/λ),x)
end

function sample_ekwg(QG, n, par0, par1...)
    a = par0[1]
    b = par0[2]
    c = par0[3]

    u = rand(n)

    p = (1 - (1 - u.^(1/c)).^(1/b)).^(1/a)

    QG(p, par1...)
end

# Função de distribuição acumulada Exponentiated Kw-G class (EKw-G)
function cdf_ekwg(cdf, x, par0, par1...)
    a = par0[1]
    b = par0[2]
    c = par0[3]

    (1 - (1 - cdf.(x,par1...).^a).^b).^c
end

# Função densidade de probabilidade Exponentiated Kw-G class (EKw-G)
function pdf_ekwg(cdf, pdf, x, par0, par1...)
    a = par0[1]
    b = par0[2]
    c = par0[3]

    g = pdf(x, par1...)
    G = cdf(x, par1...)

    a * b * c * g * G.^(a-1) * (1-G.^a).^(b-1) * (1 - (1-G.^a).^b).^(c-1)

end

# valor = hquadrature(x ->
# pdf_ekwg(Gexp,gexp, x, [1,1,1], 1), 0, 100)[1]

function loglike(cdf, pdf, x, par0, par1...)
  n = length(x)
  soma = 0
  for i = 1:n
      soma += log(pdf_ekwg(cdf, pdf, x[i], par0, par1...))
  end
  return -soma # Queremos minimizar loglike.
end

 G = Gexp
g = gexp
data = sample_ekwg(QGexp, 550, [1,1,1],1.5)
starts = [1,1,1]
par0 = [1,1,1]
par1 = [1.5]

srand(0) # set seed.

@time for i = 1:5
     optimize(par0 -> loglike(G, g, data, par0, par1), [1.3,1.2,2.1])
end

执行这两个代码的机器配置如下:

enter image description here

注意:代码不能使用相同的示例。但是我相信这不会证明计算时间的巨大差异。

在我的硬件中, R 代码成本 0.6508956 秒, Julia 代码成本 27.180257 秒。

有谁知道如何让代码Julia比R代码运行得更快? 我想要一个简单的解决方案,因为它承诺在Julia中具有很好的计算性能,而不需要太多的编程理解。看,在R中没有做太多的事情来证明代码Julia中的主要修正。

最好的问候。

3 个答案:

答案 0 :(得分:3)

在上述讨论的总结中,这里有几个问题:

  1. 如前所述,splatting很慢
  2. optimize的调用未包含在也减慢计算的函数中
  3. 由于您已将loglike定义为整数,因此
  4. soma不是类型稳定的
  5. 两个优化例程都可以调用loglike不同的次数(由于它们的配置不同) - 所以最好将给定数量的调用基准测试到loglike - 我选择了1000以下
  6. 下面我发布一个清理过的Julia代码和R代码,它们应该做同样的工作,而Julia的速度要快2倍。在Julia中预编译之后的时间是:

    julia> experiment(Gexp, gexp, data, par0, par1)
      0.112414 seconds (2.75 M allocations: 41.992 MiB, 3.38% gc time)
    

    和R是

    > start = Sys.time()
    > for(i in 1:1000){
    +   loglikelihood(G, g, starts, data, 1.5)
    + }
    > Sys.time() - start
    Time difference of 0.2812479 secs
    

    以下是清理后的代码(我希望在删除不必要的部分时没有错误:) - 所以请检查我是否在某处搞砸了。)

    朱莉娅

    using Distributions
    
    gexp(x,λ) = λ * exp(-λ * x)
    Gexp(x,λ) = 1.0 - exp(-λ * x)
    QGexp(x,λ) = quantile.(Exponential(1/λ), x)
    
    function sample_ekwg(QG, n, par0, par1)
        a = par0[1]
        b = par0[2]
        c = par0[3]
        u = rand(n)
        p = (1 - (1 - u.^(1/c)).^(1/b)).^(1/a)
        QG(p, par1)
    end
    
    function pdf_ekwg(cdf, pdf, x, par0, par1)
        a = par0[1]
        b = par0[2]
        c = par0[3]
        g = pdf(x, par1)
        G = cdf(x, par1)
        a*b*c*g*G^(a-1)*(1-G^a)^(b-1)*(1-(1-G^a)^b)^(c-1)
    end
    
    function loglike(cdf, pdf, x, par0, par1)
      soma = 0.0
      for v in x
          soma += log(pdf_ekwg(cdf, pdf, v, par0, par1))
      end
      return -soma
    end
    
    par0 = [1.0,1.0,1.0]
    par1 = 1.5
    data = sample_ekwg(QGexp, 550, par0,par1)
    
    function experiment(G, g, data, par0, par1)
      @time for i = 1:1000
           loglike(G, g, data, par0, par1)
      end
    end
    
    experiment(Gexp, gexp, data, par0, par1)
    

    - [R

    Gexp <- function(par,x){
      lambda <- par[1]
      pexp(q = x, rate = lambda, lower.tail = TRUE, log.p = FALSE)
    }
    
    gexp <- function(par,x){
      lambda <- par[1]
      dexp(x = x, rate = lambda, log = FALSE)
    }
    
    QGexp <- function(p,...){
      qexp(p,...)
    }
    
    cdf_ekwg <- function(cdf,par,x,...){
      a <- par[1]
      b <- par[2]
      c <- par[3]
      npar <- c(...)
    
      (1 - (1 - cdf(par = npar, x = x)^a)^b)^c
    
    }
    
    pdf_ekwg <- function(cdf, pdf, par, x, ...){
    
      a <- par[1]
      b <- par[2]
      c <- par[3]
      npar <- c(...)
      g = pdf(par = npar, x = x)
      G = cdf(par = npar, x = x)
      a * b * c * g * G^(a-1) * (1-G^a)^(b-1) * (1 - (1-G^a)^b)^(c-1)
    }
    
    sample_ekwg <- function(QG, n, par, ...){
      a <- par[1]
      b <- par[2]
      c <- par[3]
      u <- runif(n = n, min = 0, max = 1)
      p <- (1 - (1 - u^(1/c))^(1/b))^(1/a)
      QG(p = p, ...)
    }
    
    cdf_ekwg <- function(cdf,par,x,...){
      a <- par[1]
      b <- par[2]
      c <- par[3]
      npar <- c(...)
      (1 - (1 - cdf(par = npar, x = x)^a)^b)^c
    
    }
    
    pdf_ekwg <- function(cdf, pdf, par, x, ...){
      a <- par[1]
      b <- par[2]
      c <- par[3]
      npar <- c(...)
      g = pdf(par = npar, x = x)
      G = cdf(par = npar, x = x)
      a * b * c * g * G^(a-1) * (1-G^a)^(b-1) * (1 - (1-G^a)^b)^(c-1)
    }
    
    loglikelihood <- function(cdf, pdf, par, x, ...){
      -sum(log(pdf_ekwg(cdf = cdf, pdf = pdf, par = par, x = x, ...)))
    }
    
    G = Gexp
    g = gexp
    data = sample_ekwg(QG = QGexp, n = 550, par = c(1,1,1), rate = 1.5)
    starts = c(1,1,1)
    
    start = Sys.time()
    for(i in 1:1000){
      loglikelihood(G, g, starts, data, 1.5)
    }
    Sys.time() - start
    

    编辑 - 更优化Julia代码

    我决定不传递Gexpgexp,而是直接称他们为:

    function pdf_ekwg(x, par0, par1)
        a = par0[1]
        b = par0[2]
        c = par0[3]
        g = gexp(x, par1)
        G = Gexp(x, par1)
        a*b*c*g*G^(a-1)*(1-G^a)^(b-1)*(1-(1-G^a)^b)^(c-1)
    end
    

    然后时间好了2倍:

    julia> experiment(data, par0, par1)
      0.061860 seconds
    

答案 1 :(得分:1)

请注意,R和Julia计算的迭代次数不同。这意味着在相同的迭代次数中,Julia执行更多函数调用,但更接近解决方案。所以你报告的差异并不令人惊讶。

以下是一个显示此内容的最小示例:

朱莉娅

julia> using Optim

julia> function f(x)
           println(x)
           sum(x.^2)
       end
f (generic function with 1 method)

julia> optimize(f, [10.0, 10.0, -10.0], Optim.Options(iterations = 10))
[10.0, 10.0, -10.0]
[15.025, 10.0, -10.0]
[10.0, 15.025, -10.0]
[10.0, 10.0, -14.975]
[13.35, 4.975, -13.3167]
[7.20833, 6.65, -15.5278]
[10.3722, 4.41667, -10.9213]
[10.4963, 2.55556, -9.57006]
[5.11975, 7.8287, -10.0819]
[2.37634, 8.77994, -9.00364]
[8.04009, 7.57366, -3.52135]
[8.31734, 7.88155, 0.480788]
[4.12665, 2.81136, -2.06194]
[2.16887, 0.41515, 0.584081]
[-1.92127, 8.82887, 4.27755]
[3.33362, 2.63711, 12.5652]
[2.57577, 7.50018, -4.51012]
[-6.43509, 3.28125, -0.246445]
[0.794297, -1.36448, -7.05921]
[-1.15731, 0.777307, -2.24052]
Results of Optimization Algorithm
 * Algorithm: Nelder-Mead
 * Starting Point: [10.0,10.0,-10.0]
 * Minimizer: [2.1688665345729614,0.4151501295534299, ...]
 * Minimum: 5.217482e+00
 * Iterations: 10
 * Convergence: false
   *  √(Σ(yᵢ-ȳ)²)/n < 1.0e-08: false
   * Reached Maximum Number of Iterations: true
 * Objective Calls: 20

- [R

> f <- function(x) {
+     print(x)
+     sum(x^2)
+ }
> 
> optim(c(10, 10, -10), f, control=list(maxit=10))
[1]  10  10 -10
[1]  11  10 -10
[1]  10  11 -10
[1] 10 10 -9
[1]  9.000000 10.666667 -9.333333
[1]  9.5 10.5 -9.5
[1]  9.333333  9.444444 -8.888889
[1]  9.000000  8.666667 -8.333333
[1]  8.666667  9.555556 -7.777778
[1]  9.000000  9.666667 -8.333333
[1]  9.444444  8.148148 -7.407407
[1]  9.666667  6.888889 -6.444444
$`par`
[1]  9.000000  8.666667 -8.333333

$value
[1] 225.5556

$counts
function gradient 
      12       NA 

$convergence
[1] 1

$message
NULL

答案 2 :(得分:0)

为了公平地比较R语言的AKSequencer函数和Julia的AKSequencer = AKSequencer()函数 Optim 包,我考虑了 Nelder-Mead optim中最大 500 次迭代和收敛容差的方法。这是标准的R optimize函数。

<强>朱莉娅

1e^-8

<强> - [R

optim

我认为比较现在比我做的第一次比较更公平。我在R中采用的相同编程实践也在Julia中进行了考虑。例如,在R中,我没有考虑优化代码,我只是实现了它。

输出朱莉娅

using Distributions
using Optim

gexp(x,λ) = λ * exp(-λ * x)
Gexp(x,λ) = 1.0 - exp(-λ * x)
QGexp(x,λ) = quantile.(Exponential(1/λ), x)

function sample_ekwg(QG, n, par0, par1)
    a = par0[1]
    b = par0[2]
    c = par0[3]
    u = rand(n)
    p = (1 - (1 - u.^(1/c)).^(1/b)).^(1/a)
    QG(p, par1)
end

function pdf_ekwg(cdf, pdf, x, par0, par1)
    a = par0[1]
    b = par0[2]
    c = par0[3]
    g = pdf(x, par1)
    G = cdf(x, par1)
    a*b*c*g*G^(a-1)*(1-G^a)^(b-1)*(1-(1-G^a)^b)^(c-1)
end

function loglike(cdf, pdf, x, par0, par1)
  soma = 0.0
  for v in x
      soma += log(pdf_ekwg(cdf, pdf, v, par0, par1))
  end
  return -soma
end

par0 = [1.0,1.0,1.0]
par1 = 1.5
n = 20
srand(0)
data = sample_ekwg(QGexp, 5000, par0,par1)

function experiment(G, g, data, par0, par1, n)
  result = Vector(length(par0)*n)
  @time for i = 1:n
       result[3i - 2 : 3i] = optimize(par0 -> loglike(G, g, data, par0, par1),
                                      par0, Optim.Options(iterations = 500, g_tol = 1e-8)).minimizer
       loglike(G, g, data, par0, par1)
  end
  return reshape(result,length(par0),n)'
end

experiment(Gexp, gexp, data, par0, par1, n)

输出R

Gexp <- function(par,x){
  lambda <- par[1]
  pexp(q = x, rate = lambda, lower.tail = TRUE, log.p = FALSE)
}

gexp <- function(par,x){
  lambda <- par[1]
  dexp(x = x, rate = lambda, log = FALSE)
}

QGexp <- function(p,...){
  qexp(p,...)
}

cdf_ekwg <- function(cdf,par,x,...){
  a <- par[1]
  b <- par[2]
  c <- par[3]
  npar <- c(...)

  (1 - (1 - cdf(par = npar, x = x)^a)^b)^c

}

pdf_ekwg <- function(cdf, pdf, par, x, ...){

  a <- par[1]
  b <- par[2]
  c <- par[3]
  npar <- c(...)
  g = pdf(par = npar, x = x)
  G = cdf(par = npar, x = x)
  a * b * c * g * G^(a-1) * (1-G^a)^(b-1) * (1 - (1-G^a)^b)^(c-1)
}

sample_ekwg <- function(QG, n, par, ...){
  a <- par[1]
  b <- par[2]
  c <- par[3]
  u <- runif(n = n, min = 0, max = 1)
  p <- (1 - (1 - u^(1/c))^(1/b))^(1/a)
  QG(p = p, ...)
}

cdf_ekwg <- function(cdf,par,x,...){
  a <- par[1]
  b <- par[2]
  c <- par[3]
  npar <- c(...)
  (1 - (1 - cdf(par = npar, x = x)^a)^b)^c

}

pdf_ekwg <- function(cdf, pdf, par, x, ...){
  a <- par[1]
  b <- par[2]
  c <- par[3]
  npar <- c(...)
  g = pdf(par = npar, x = x)
  G = cdf(par = npar, x = x)
  a * b * c * g * G^(a-1) * (1-G^a)^(b-1) * (1 - (1-G^a)^b)^(c-1)
}

loglikelihood <- function(cdf, pdf, par, x, ...){
  -sum(log(pdf_ekwg(cdf = cdf, pdf = pdf, par = par, x = x, ...)))
}

G = Gexp
g = gexp
set.seed(0)
data = sample_ekwg(QG = QGexp, n = 5e3, par = c(1,1,1), rate = 1.5)
starts = c(1,1,1)

start = Sys.time()
for(i in 1:20){
  result <- optim(par = starts, fn = loglikelihood, x = data, cdf = G,
                    pdf = g, method = "Nelder-Mead",rate = 1.5)
}
Sys.time() - start