我通常在Julia中看到过多个关于内存分配的问题,但是这些示例都没有帮助我。 我提供一个最小的例子来说明我的问题。我实现了一个有限体积求解器,用于计算对流方程的解。长话短说,这里(自包含)代码:
function dummyexample()
nx = 100
Δx = 1.0/nx
x = range(Δx/2.0, length=nx, step=Δx)
ρ = sin.(2π*x)
for i=1:floor(1.0/Δx / 0.5)
shu_osher_step!(ρ) # This part is executed several times
end
println(sum(Δx*abs.(ρ .- sin.(2π*x))))
end
function shu_osher_step!(ρ::AbstractArray)
ρ₁ = euler_step(ρ) # array allocation
ρ₂ = 3.0/4.0*ρ .+ 1.0/4.0*euler_step(ρ₁) # array allocation
ρ .= 1.0/3.0*ρ .+ 2.0/3.0*euler_step(ρ₂) # array allocation
end
function euler_step(ρ::AbstractArray)
return ρ .+ 0.5*rhs(ρ)
end
function rhs(ρ::AbstractArray)
ρₗ = circshift(ρ,+1) # array allocation
ρᵣ = circshift(ρ,-1) # array allocation
Δρₗ = ρ.-ρₗ # array allocation
Δρᵣ = ρᵣ .-ρ # array allocation
vᵣ = ρ .+ 1.0/2.0 .* H(Δρₗ,Δρᵣ) # array allocation
return -(vᵣ .- circshift(vᵣ,+1)) # array allocation
end
function H(Δρₗ::AbstractArray,Δρᵣ::AbstractArray)
σ = Δρₗ ./ Δρᵣ
σ̃ = max.(abs.(σ),1e-12) .* (2.0 .* (σ .>= 0.0) .- 1.0)
for i=1:100
if isnan(σ̃[i])
σ̃[i] = 1e-12
end
end
return Δρₗ .* (2.0/3.0*(1.0 ./ σ̃) .+ 1.0/3.0)
end
我的问题是,在调用树的最深处,函数rhs
在最高时间循环的每次迭代中分配几个数组。这些数组是临时的,我不喜欢每次迭代都必须重新分配它们的事实。这里是@time
的输出:
julia> include("dummyexample.jl");
julia> @time dummyexample()
8.780349744014917e-5 # <- just to check that the error is almost zero
0.362833 seconds (627.38 k allocations: 39.275 MiB, 1.95% gc time)
现在,在真实代码中,实际上有一个结构p
传递到整个调用树,该结构包含我在此处进行硬编码的属性(基本上,每个明确声明的数字都将由p.n
引用,依此类推) )
我可能还可以像这样传递预分配的数组,但这似乎很混乱,每当我想要进行额外的计算时,我都必须更改它。
朱莉娅文档中不建议使用全局数组,但这不是有用吗?我还有其他明显的事情吗?我正在考虑使用Julia 1.0。
答案 0 :(得分:1)
circshift
。
同时应用这两种想法会导致以下结果:
function dummyexample()
nx = 100
Δx = 1.0 / nx
steps = 2 ÷ Δx
x = range(Δx ÷ 2, length = nx, step = Δx)
ρ = sin.(2π .* x)
run!(ρ, steps)
println(sum(@. Δx * abs(ρ - sin(2π * x))))
end
function run!(ρ, steps)
ρ₁, ρ₂, v = similar(ρ), similar(ρ), similar(ρ)
for i = 1:steps
shu_osher_step!(ρ₁, ρ₂, v, ρ)
end
return ρ
end
function shu_osher_step!(ρ₁, ρ₂, v, ρ)
euler_step!(ρ₁, v, ρ)
ρ₂ .= 3.0/4.0 .* ρ .+ 1.0/4.0 .* euler_step!(ρ₂, v, ρ₁)
ρ .= 1.0/3.0 .* ρ .+ 2.0/3.0 .* euler_step!(ρ, v, ρ₂)
end
function euler_step!(ρₒ, v, ρ)
cycle(i) = mod(i - 1, length(ρ)) + 1
# two steps of calculating v fused into one -- could be replaced by
# an extra loop for v.
for I in 1:2:size(ρ, 1)
v[I] = rhs(ρ[cycle(I-1)], ρ[I], ρ[cycle(I+1)])
v[cycle(I+1)] = rhs(ρ[cycle(I)], ρ[I+1], ρ[cycle(I+2)])
ρₒ[I] += 0.5 * (v[cycle(I+1)] - v[I])
end
return ρₒ
end
function rhs(ρₗ, ρᵢ, ρᵣ)
Δρₗ = ρᵢ - ρₗ
Δρᵣ = ρᵣ - ρᵢ
return ρᵢ + 1/2 * H(Δρₗ, Δρᵣ)
end
function H(Δρₗ, Δρᵣ)
σ = Δρₗ / Δρᵣ
σ̃ = max(abs(σ), 1e-12) * (2.0 * (σ >= 0.0) - 1.0)
isnan(σ̃) && (σ̃ = 1e-12)
return Δρₗ * (2.0 / 3.0 * (1.0 / σ̃) + 1.0 / 3.0)
end
由于我缺乏领域知识(dummyexample()
印刷品0.02984422033942575
),以上内容可能仍包含一些逻辑错误,但是您看到了这种模式。而且benchmarks很好:
julia> @benchmark run!($ρ, $steps)
BenchmarkTools.Trial:
memory estimate: 699.13 KiB
allocs estimate: 799
--------------
minimum time: 3.024 ms (0.00% GC)
median time: 3.164 ms (0.00% GC)
mean time: 3.760 ms (1.69% GC)
maximum time: 57.105 ms (94.41% GC)
--------------
samples: 1327
evals/sample: 1