我已经构建了两个语言几乎完全相同的语言 R 和 Julia 。
事先我知道有一些方法可以改善Julia代码的性能,因为我没有声明类型,而且通常非向量化代码往往更有效。但是,Julia代码和 R 代码都在相同的地形上进行比较。
注意:我开始学习朱莉娅一周,所以你可以说我没有用朱莉娅编程得很好。
我注意到Optim包提供了optimize()
功能。与默认安装的统计数据包的 R 语言的optim()
功能相比,这是非常慢的 R 语言。
以下是代码 R 和 Julia :
R代码:
rm(list=ls(all=TRUE))
Gexp <- function(par,x){
lambda <- par[1]
pexp(q = x, rate = lambda, lower.tail = TRUE, log.p = FALSE)
}
gexp <- function(par,x){
lambda <- par[1]
dexp(x = x, rate = lambda, log = FALSE)
}
QGexp <- function(p,...){
qexp(p,...)
}
Gweibull <- function(par,x){
alpha <- par[1]
beta <- par[2]
pweibull(q = x, shape = alpha, scale = beta, lower.tail = TRUE, log.p = FALSE)
}
QGweibull <- function(p,...){
qweibull(p,...)
}
# Função de distribuição acumulada Exponentiated Kw-G class (EKw-G)
cdf_ekwg <- function(cdf,par,x,...){
a <- par[1]
b <- par[2]
c <- par[3]
npar <- c(...)
(1 - (1 - cdf(par = npar, x = x)^a)^b)^c
}
# cdf_ekwg(cdf = G, par = c(0.2,0.4,0.21), x = 1, alpha = 1.1, beta = 1.2, lambda = 1)
# Função densidade de probabilidade Exponentiated Kw-G class (EKw-G)
pdf_ekwg <- function(cdf, pdf, par, x, ...){
a <- par[1]
b <- par[2]
c <- par[3]
#cdf_ekwg_locale <- function(x){
# cdf_ekwg(cdf = cdf, par = par, x, ...)
#}
npar <- c(...)
g = pdf(par = npar, x = x)
G = cdf(par = npar, x = x)
a * b * c * g * G^(a-1) * (1-G^a)^(b-1) * (1 - (1-G^a)^b)^(c-1)
#numDeriv::grad(func = cdf_ekwg_locale, x = x, method = "simple")
}
#integrate(f = pdf_ekwg, par = c(1,1,1.5), lower = 0, upper = Inf, cdf = Gexp, pdf = gexp,
# lambda = 1.5)
# Será fixado os parâmetros de G. Serão estimados os parâmetros a, b e c do
# modelo EKwG.
sample_ekwg <- function(QG, n, par, ...){
a <- par[1]
b <- par[2]
c <- par[3]
u <- runif(n = n, min = 0, max = 1)
p <- (1 - (1 - u^(1/c))^(1/b))^(1/a)
QG(p = p, ...)
}
# Função de distribuição acumulada Exponentiated Kw-G class (EKw-G)
cdf_ekwg <- function(cdf,par,x,...){
a <- par[1]
b <- par[2]
c <- par[3]
npar <- c(...)
(1 - (1 - cdf(par = npar, x = x)^a)^b)^c
}
# cdf_ekwg(cdf = G, par = c(0.2,0.4,0.21), x = 1, alpha = 1.1, beta = 1.2, lambda = 1)
# Função densidade de probabilidade Exponentiated Kw-G class (EKw-G)
pdf_ekwg <- function(cdf, pdf, par, x, ...){
a <- par[1]
b <- par[2]
c <- par[3]
#cdf_ekwg_locale <- function(x){
# cdf_ekwg(cdf = cdf, par = par, x, ...)
#}
npar <- c(...)
g = pdf(par = npar, x = x)
G = cdf(par = npar, x = x)
a * b * c * g * G^(a-1) * (1-G^a)^(b-1) * (1 - (1-G^a)^b)^(c-1)
#numDeriv::grad(func = cdf_ekwg_locale, x = x, method = "simple")
}
# integrate(f = pdf_ekwg, par = c(1.4,1.3,0.5), lower = 0, upper = Inf, cdf = G, alpha = 1.1, beta = 1.2)
loglikelihood <- function(cdf, pdf, par, x, ...){
-sum(log(pdf_ekwg(cdf = cdf, pdf = pdf, par = par, x = x, ...)))
}
myoptim <- function(...) tryCatch(optim(...), error = function(e) NA)
G = Gexp
g = gexp
data = sample_ekwg(QG = QGexp, n = 550, par = c(1,1,1), rate = 1.5)
starts = c(1,1,1)
set.seed(0)
start = Sys.time()
for(i in 1:5){
result <- myoptim(par = starts, fn = loglikelihood, x = data, cdf = G,
pdf = g, method = "Nelder-Mead",rate = 1.5)
}
Sys.time() - start
Julia代码:
using Distributions
#using Cubature # Calculo de integrais numéricas.
#using Plots
using Optim
#using JuMP
#using NLopt
function gexp(x,par)
λ = par[1]
λ * exp(-λ * x)
end
# valor = hquadrature(x -> gexp(x,1), 0, 100)[1]
function Gexp(x,par)
λ = par[1]
1- exp(-λ * x)
end
function QGexp(x,par)
λ = par[1]
# A função Exponential no pacote Distributions é reparametrizada
# como 1/lambda. Dessa forma, para trabalhar com densidade na forma
# λ * exp(-λ*x) é preciso tomar 1/λ.
quantile.(Exponential(1/λ),x)
end
function sample_ekwg(QG, n, par0, par1...)
a = par0[1]
b = par0[2]
c = par0[3]
u = rand(n)
p = (1 - (1 - u.^(1/c)).^(1/b)).^(1/a)
QG(p, par1...)
end
# Função de distribuição acumulada Exponentiated Kw-G class (EKw-G)
function cdf_ekwg(cdf, x, par0, par1...)
a = par0[1]
b = par0[2]
c = par0[3]
(1 - (1 - cdf.(x,par1...).^a).^b).^c
end
# Função densidade de probabilidade Exponentiated Kw-G class (EKw-G)
function pdf_ekwg(cdf, pdf, x, par0, par1...)
a = par0[1]
b = par0[2]
c = par0[3]
g = pdf(x, par1...)
G = cdf(x, par1...)
a * b * c * g * G.^(a-1) * (1-G.^a).^(b-1) * (1 - (1-G.^a).^b).^(c-1)
end
# valor = hquadrature(x ->
# pdf_ekwg(Gexp,gexp, x, [1,1,1], 1), 0, 100)[1]
function loglike(cdf, pdf, x, par0, par1...)
n = length(x)
soma = 0
for i = 1:n
soma += log(pdf_ekwg(cdf, pdf, x[i], par0, par1...))
end
return -soma # Queremos minimizar loglike.
end
G = Gexp
g = gexp
data = sample_ekwg(QGexp, 550, [1,1,1],1.5)
starts = [1,1,1]
par0 = [1,1,1]
par1 = [1.5]
srand(0) # set seed.
@time for i = 1:5
optimize(par0 -> loglike(G, g, data, par0, par1), [1.3,1.2,2.1])
end
执行这两个代码的机器配置如下:
注意:代码不能使用相同的示例。但是我相信这不会证明计算时间的巨大差异。
在我的硬件中, R 代码成本 0.6508956 秒, Julia 代码成本 27.180257 秒。
有谁知道如何让代码Julia比R代码运行得更快? 我想要一个简单的解决方案,因为它承诺在Julia中具有很好的计算性能,而不需要太多的编程理解。看,在R中没有做太多的事情来证明代码Julia中的主要修正。
最好的问候。
答案 0 :(得分:3)
在上述讨论的总结中,这里有几个问题:
optimize
的调用未包含在也减慢计算的函数中loglike
定义为整数,因此soma
不是类型稳定的
loglike
不同的次数(由于它们的配置不同) - 所以最好将给定数量的调用基准测试到loglike
- 我选择了1000以下下面我发布一个清理过的Julia代码和R代码,它们应该做同样的工作,而Julia的速度要快2倍。在Julia中预编译之后的时间是:
julia> experiment(Gexp, gexp, data, par0, par1)
0.112414 seconds (2.75 M allocations: 41.992 MiB, 3.38% gc time)
和R是
> start = Sys.time()
> for(i in 1:1000){
+ loglikelihood(G, g, starts, data, 1.5)
+ }
> Sys.time() - start
Time difference of 0.2812479 secs
以下是清理后的代码(我希望在删除不必要的部分时没有错误:) - 所以请检查我是否在某处搞砸了。)
using Distributions
gexp(x,λ) = λ * exp(-λ * x)
Gexp(x,λ) = 1.0 - exp(-λ * x)
QGexp(x,λ) = quantile.(Exponential(1/λ), x)
function sample_ekwg(QG, n, par0, par1)
a = par0[1]
b = par0[2]
c = par0[3]
u = rand(n)
p = (1 - (1 - u.^(1/c)).^(1/b)).^(1/a)
QG(p, par1)
end
function pdf_ekwg(cdf, pdf, x, par0, par1)
a = par0[1]
b = par0[2]
c = par0[3]
g = pdf(x, par1)
G = cdf(x, par1)
a*b*c*g*G^(a-1)*(1-G^a)^(b-1)*(1-(1-G^a)^b)^(c-1)
end
function loglike(cdf, pdf, x, par0, par1)
soma = 0.0
for v in x
soma += log(pdf_ekwg(cdf, pdf, v, par0, par1))
end
return -soma
end
par0 = [1.0,1.0,1.0]
par1 = 1.5
data = sample_ekwg(QGexp, 550, par0,par1)
function experiment(G, g, data, par0, par1)
@time for i = 1:1000
loglike(G, g, data, par0, par1)
end
end
experiment(Gexp, gexp, data, par0, par1)
Gexp <- function(par,x){
lambda <- par[1]
pexp(q = x, rate = lambda, lower.tail = TRUE, log.p = FALSE)
}
gexp <- function(par,x){
lambda <- par[1]
dexp(x = x, rate = lambda, log = FALSE)
}
QGexp <- function(p,...){
qexp(p,...)
}
cdf_ekwg <- function(cdf,par,x,...){
a <- par[1]
b <- par[2]
c <- par[3]
npar <- c(...)
(1 - (1 - cdf(par = npar, x = x)^a)^b)^c
}
pdf_ekwg <- function(cdf, pdf, par, x, ...){
a <- par[1]
b <- par[2]
c <- par[3]
npar <- c(...)
g = pdf(par = npar, x = x)
G = cdf(par = npar, x = x)
a * b * c * g * G^(a-1) * (1-G^a)^(b-1) * (1 - (1-G^a)^b)^(c-1)
}
sample_ekwg <- function(QG, n, par, ...){
a <- par[1]
b <- par[2]
c <- par[3]
u <- runif(n = n, min = 0, max = 1)
p <- (1 - (1 - u^(1/c))^(1/b))^(1/a)
QG(p = p, ...)
}
cdf_ekwg <- function(cdf,par,x,...){
a <- par[1]
b <- par[2]
c <- par[3]
npar <- c(...)
(1 - (1 - cdf(par = npar, x = x)^a)^b)^c
}
pdf_ekwg <- function(cdf, pdf, par, x, ...){
a <- par[1]
b <- par[2]
c <- par[3]
npar <- c(...)
g = pdf(par = npar, x = x)
G = cdf(par = npar, x = x)
a * b * c * g * G^(a-1) * (1-G^a)^(b-1) * (1 - (1-G^a)^b)^(c-1)
}
loglikelihood <- function(cdf, pdf, par, x, ...){
-sum(log(pdf_ekwg(cdf = cdf, pdf = pdf, par = par, x = x, ...)))
}
G = Gexp
g = gexp
data = sample_ekwg(QG = QGexp, n = 550, par = c(1,1,1), rate = 1.5)
starts = c(1,1,1)
start = Sys.time()
for(i in 1:1000){
loglikelihood(G, g, starts, data, 1.5)
}
Sys.time() - start
我决定不传递Gexp
和gexp
,而是直接称他们为:
function pdf_ekwg(x, par0, par1)
a = par0[1]
b = par0[2]
c = par0[3]
g = gexp(x, par1)
G = Gexp(x, par1)
a*b*c*g*G^(a-1)*(1-G^a)^(b-1)*(1-(1-G^a)^b)^(c-1)
end
然后时间好了2倍:
julia> experiment(data, par0, par1)
0.061860 seconds
答案 1 :(得分:1)
请注意,R和Julia计算的迭代次数不同。这意味着在相同的迭代次数中,Julia执行更多函数调用,但更接近解决方案。所以你报告的差异并不令人惊讶。
以下是一个显示此内容的最小示例:
julia> using Optim
julia> function f(x)
println(x)
sum(x.^2)
end
f (generic function with 1 method)
julia> optimize(f, [10.0, 10.0, -10.0], Optim.Options(iterations = 10))
[10.0, 10.0, -10.0]
[15.025, 10.0, -10.0]
[10.0, 15.025, -10.0]
[10.0, 10.0, -14.975]
[13.35, 4.975, -13.3167]
[7.20833, 6.65, -15.5278]
[10.3722, 4.41667, -10.9213]
[10.4963, 2.55556, -9.57006]
[5.11975, 7.8287, -10.0819]
[2.37634, 8.77994, -9.00364]
[8.04009, 7.57366, -3.52135]
[8.31734, 7.88155, 0.480788]
[4.12665, 2.81136, -2.06194]
[2.16887, 0.41515, 0.584081]
[-1.92127, 8.82887, 4.27755]
[3.33362, 2.63711, 12.5652]
[2.57577, 7.50018, -4.51012]
[-6.43509, 3.28125, -0.246445]
[0.794297, -1.36448, -7.05921]
[-1.15731, 0.777307, -2.24052]
Results of Optimization Algorithm
* Algorithm: Nelder-Mead
* Starting Point: [10.0,10.0,-10.0]
* Minimizer: [2.1688665345729614,0.4151501295534299, ...]
* Minimum: 5.217482e+00
* Iterations: 10
* Convergence: false
* √(Σ(yᵢ-ȳ)²)/n < 1.0e-08: false
* Reached Maximum Number of Iterations: true
* Objective Calls: 20
> f <- function(x) {
+ print(x)
+ sum(x^2)
+ }
>
> optim(c(10, 10, -10), f, control=list(maxit=10))
[1] 10 10 -10
[1] 11 10 -10
[1] 10 11 -10
[1] 10 10 -9
[1] 9.000000 10.666667 -9.333333
[1] 9.5 10.5 -9.5
[1] 9.333333 9.444444 -8.888889
[1] 9.000000 8.666667 -8.333333
[1] 8.666667 9.555556 -7.777778
[1] 9.000000 9.666667 -8.333333
[1] 9.444444 8.148148 -7.407407
[1] 9.666667 6.888889 -6.444444
$`par`
[1] 9.000000 8.666667 -8.333333
$value
[1] 225.5556
$counts
function gradient
12 NA
$convergence
[1] 1
$message
NULL
答案 2 :(得分:0)
为了公平地比较R语言的AKSequencer
函数和Julia的AKSequencer = AKSequencer()
函数 Optim 包,我考虑了 Nelder-Mead optim
中最大 500 次迭代和收敛容差的方法。这是标准的R optimize
函数。
<强>朱莉娅强>
1e^-8
<强> - [R 强>
optim
我认为比较现在比我做的第一次比较更公平。我在R中采用的相同编程实践也在Julia中进行了考虑。例如,在R中,我没有考虑优化代码,我只是实现了它。
输出朱莉娅
using Distributions
using Optim
gexp(x,λ) = λ * exp(-λ * x)
Gexp(x,λ) = 1.0 - exp(-λ * x)
QGexp(x,λ) = quantile.(Exponential(1/λ), x)
function sample_ekwg(QG, n, par0, par1)
a = par0[1]
b = par0[2]
c = par0[3]
u = rand(n)
p = (1 - (1 - u.^(1/c)).^(1/b)).^(1/a)
QG(p, par1)
end
function pdf_ekwg(cdf, pdf, x, par0, par1)
a = par0[1]
b = par0[2]
c = par0[3]
g = pdf(x, par1)
G = cdf(x, par1)
a*b*c*g*G^(a-1)*(1-G^a)^(b-1)*(1-(1-G^a)^b)^(c-1)
end
function loglike(cdf, pdf, x, par0, par1)
soma = 0.0
for v in x
soma += log(pdf_ekwg(cdf, pdf, v, par0, par1))
end
return -soma
end
par0 = [1.0,1.0,1.0]
par1 = 1.5
n = 20
srand(0)
data = sample_ekwg(QGexp, 5000, par0,par1)
function experiment(G, g, data, par0, par1, n)
result = Vector(length(par0)*n)
@time for i = 1:n
result[3i - 2 : 3i] = optimize(par0 -> loglike(G, g, data, par0, par1),
par0, Optim.Options(iterations = 500, g_tol = 1e-8)).minimizer
loglike(G, g, data, par0, par1)
end
return reshape(result,length(par0),n)'
end
experiment(Gexp, gexp, data, par0, par1, n)
输出R
Gexp <- function(par,x){
lambda <- par[1]
pexp(q = x, rate = lambda, lower.tail = TRUE, log.p = FALSE)
}
gexp <- function(par,x){
lambda <- par[1]
dexp(x = x, rate = lambda, log = FALSE)
}
QGexp <- function(p,...){
qexp(p,...)
}
cdf_ekwg <- function(cdf,par,x,...){
a <- par[1]
b <- par[2]
c <- par[3]
npar <- c(...)
(1 - (1 - cdf(par = npar, x = x)^a)^b)^c
}
pdf_ekwg <- function(cdf, pdf, par, x, ...){
a <- par[1]
b <- par[2]
c <- par[3]
npar <- c(...)
g = pdf(par = npar, x = x)
G = cdf(par = npar, x = x)
a * b * c * g * G^(a-1) * (1-G^a)^(b-1) * (1 - (1-G^a)^b)^(c-1)
}
sample_ekwg <- function(QG, n, par, ...){
a <- par[1]
b <- par[2]
c <- par[3]
u <- runif(n = n, min = 0, max = 1)
p <- (1 - (1 - u^(1/c))^(1/b))^(1/a)
QG(p = p, ...)
}
cdf_ekwg <- function(cdf,par,x,...){
a <- par[1]
b <- par[2]
c <- par[3]
npar <- c(...)
(1 - (1 - cdf(par = npar, x = x)^a)^b)^c
}
pdf_ekwg <- function(cdf, pdf, par, x, ...){
a <- par[1]
b <- par[2]
c <- par[3]
npar <- c(...)
g = pdf(par = npar, x = x)
G = cdf(par = npar, x = x)
a * b * c * g * G^(a-1) * (1-G^a)^(b-1) * (1 - (1-G^a)^b)^(c-1)
}
loglikelihood <- function(cdf, pdf, par, x, ...){
-sum(log(pdf_ekwg(cdf = cdf, pdf = pdf, par = par, x = x, ...)))
}
G = Gexp
g = gexp
set.seed(0)
data = sample_ekwg(QG = QGexp, n = 5e3, par = c(1,1,1), rate = 1.5)
starts = c(1,1,1)
start = Sys.time()
for(i in 1:20){
result <- optim(par = starts, fn = loglikelihood, x = data, cdf = G,
pdf = g, method = "Nelder-Mead",rate = 1.5)
}
Sys.time() - start