我的问题大致如下。给定数字矩阵X,其中每行是项目。我希望在除自身之外的所有行中的L2距离方面找到每一行的最近邻居。我试过阅读官方文档,但仍然对如何实现这一点感到困惑。有人可以给我一些提示吗?
我的代码如下
function l2_dist(v1, v2)
return sqrt(sum((v1 - v2) .^ 2))
end
function main(Mat, dist_fun)
n = size(Mat, 1)
Dist = SharedArray{Float64}(n) #[Inf for i in 1:n]
Id = SharedArray{Int64}(n) #[-1 for i in 1:n]
@parallel for i = 1:n
Dist[i] = Inf
Id[i] = 0
end
Threads.@threads for i in 1:n
for j in 1:n
if i != j
println(i, j)
dist_temp = dist_fun(Mat[i, :], Mat[j, :])
if dist_temp < Dist[i]
println("Dist updated!")
Dist[i] = dist_temp
Id[i] = j
end
end
end
end
return Dict("Dist" => Dist, "Id" => Id)
end
n = 4000
p = 30
X = [rand() for i in 1:n, j in 1:p];
main(X[1:30, :], l2_dist)
@time N = main(X, l2_dist)
我试图在不同的核心上分发所有的i(即计算每行最小值)。但上面的版本显然无法正常工作。它甚至比顺序版本慢。有人能指出我正确的方向吗?感谢。
答案 0 :(得分:2)
除了你写下的内容之外,你可能还在做一些事情,但是,从我所看到的这一点来看,你实际上并没有做任何计算。 Julia要求您告诉它您希望它有多少处理器(或线程)可以访问。你可以通过
来做到这一点julia -p #
(其中#是您希望Julia有权访问的处理器数量)addprocs
函数添加其他处理器。export JULIA_NUM_THREADS = #
。我不太了解线程,所以我会坚持使用@parallel
宏。我建议阅读documentation了解有关线程的更多详细信息 - 也许@Chris Rackauckas可以在差异上进一步扩展。以下几条关于我的代码和代码的评论:
0.6.1-pre.0
上。我不认为我做了0.6具体的事情,但这是一个以防万一。代码低于
# Make sure all processors have access to Distances package
@everywhere using Distances
# Create a random matrix
nrow = 30
ncol = 4000
# Seed creation of random matrix so it is always same matrix
srand(42)
X = rand(nrow, ncol)
function main(X::AbstractMatrix{Float64}, M::Distances.Metric)
# Get size of the matrix
nrow, ncol = size(X)
# Create `SharedArray` to store output
ind_vec = SharedArray{Int}(ncol)
dist_vec = SharedArray{Float64}(ncol)
# Compute the distance between columns
@sync @parallel for i in 1:ncol
# Initialize various temporary variables
min_dist_i = Inf
min_ind_i = -1
X_i = view(X, :, i)
# Check distance against all other columns
for j in 1:ncol
# Skip comparison with itself
if i==j
continue
end
# Tell us who is doing the work
# (can uncomment if you want to verify stuff)
# println("Column $i compared with Column $j by worker $(myid())")
# Evaluate the new distance...
# If it is less then replace it, otherwise proceed
dist_temp = evaluate(M, X_i, view(X, :, j))
if dist_temp < min_dist_i
min_dist_i = dist_temp
min_ind_i = j
end
end
# Which column is minimum distance from column i
dist_vec[i] = min_dist_i
ind_vec[i] = min_ind_i
end
return dist_vec, ind_vec
end
# Using Euclidean metric
metric = Euclidean()
inds, dist = main(X, metric)
@time main(X, metric);
@show dist[[1, 5, 25]], inds[[1, 5, 25]]
您可以使用
运行代码 1个处理器julia testfile.jl
% julia testfile.jl
0.640365 seconds (16.00 M allocations: 732.495 MiB, 3.70% gc time)
(dist[[1, 5, 25]], inds[[1, 5, 25]]) = ([2541, 2459, 1602], [1.40892, 1.38206, 1.32184])
n个处理器(在本例中为4)julia -p n testfile.jl
% julia -p 4 testfile.jl
0.201523 seconds (2.10 k allocations: 99.107 KiB)
(dist[[1, 5, 25]], inds[[1, 5, 25]]) = ([2541, 2459, 1602], [1.40892, 1.38206, 1.32184])