我想用"对角高斯"的值初始化3d张量(多维数组)。
exp(-32*(u^2 + 16*(v^2 + w^2)))
其中u = 1/sqrt(3)*(x+y+z)
和v,w
是与u
正交的任意两个坐标,在[-1,1]^3
上的均匀网格上离散化。以下代码实现了这一目标:
function gaussian3d(n)
q = qr(ones(3,1), thin=false)[1]
x = linspace(-1.,1., n)
p = Array(Float64,(n,n,n))
square(x) = x*x
Base.@nloops 3 i p begin
@inbounds p[i_1,i_2,i_3] =
exp(
-32*(
square(q[1,1]*x[i_1] + q[2,1]*x[i_2] + q[3,1]*x[i_3])
+ 16*(
square(q[1,2]*x[i_1] + q[2,2]*x[i_2] + q[3,2]*x[i_3]) +
square(q[1,3]*x[i_1] + q[2,3]*x[i_2] + q[3,3]*x[i_3])
)
)
)
end
return p
end
然而,这似乎很慢。例如,如果我用exp(x*y*z)
替换定义函数,则代码运行速度提高50倍。此外,@time
宏报告上述代码的〜20%GC时间,我不明白它们来自何处。 (这些数值是用n = 128
获得的。)因此我的问题是
答案 0 :(得分:2)
使用"对角高斯"的值不知道3D张量,使用原始帖子的square
评论,"输入" q
(@code_warntype
helps here:性能大跳跃!),并进一步专注于@nloops
,这在我尝试过的平台上运行得更快。
julia> square(x::Float64) = x * x
square (generic function with 1 method)
julia> function my_gaussian3d(n)
q::Array{Float64,2} = qr(ones(3,1), thin=false)[1]
x = linspace(-1.,1., n)
p = Array(Float64,(n,n,n))
Base.@nloops 3 i p d->x_d=x[i_d] begin
@inbounds p[i_1,i_2,i_3] =
exp(
-32*(
square(q[1,1]*x_1 + q[2,1]*x_2 + q[3,1]*x_3)
+ 16*(
square(q[1,2]*x_1 + q[2,2]*x_2 + q[3,2]*x_3) +
square(q[1,3]*x_1 + q[2,3]*x_2 + q[3,3]*x_3)
)
)
)
end
return p
end
my_gaussian3d (generic function with 1 method)
julia> @time gaussian3d(128);
elapsed time: 3.952389337 seconds (1264 MB allocated, 4.50% gc time in 57 pauses with 0 full sweep)
julia> @time gaussian3d(128);
elapsed time: 3.527316699 seconds (1264 MB allocated, 4.42% gc time in 58 pauses with 0 full sweep)
julia> @time my_gaussian3d(128);
elapsed time: 0.285837566 seconds (16 MB allocated)
julia> @time my_gaussian3d(128);
elapsed time: 0.28476448 seconds (16 MB allocated, 1.22% gc time in 0 pauses with 0 full sweep)
julia> my_gaussian3d(128) == gaussian3d(128)
true