使用预先计算的KDTree是否有一种优雅的方式来查找连接组件的数量?现在使用带有k近邻的KDTree给出的邻接矩阵的先呼吸搜索算法找到连通分量,但是有更好的可能性吗?
import collections
import numpy as np
from sklearn import neighbors
N = 100
N_k = 8
ra = np.random.random
X0,X1 = ra(N),ra(N)
X0[0:N//2]+= 2
X1[0:N//2]+= 2
X = np.array([X0,X1]).T
tree = neighbors.KDTree(X)
dist, adj = tree.query(X, k = N_k+1)
dist = dist[:,1::]
adj = adj[:,1::]
print("Inside of find_components_lifo")
print("N = %d/ N_k = %d"%(N,N_k))
labels = np.zeros(N, dtype = np.int) - 1
n = 0
steps = 0
remains = (labels == -1)
while n < N:
i = np.arange(0,N,1)[remains][np.random.randint(0,N - n)]
# This is important for directed graphs
labels[i] = i
lifo = collections.deque([i])
while lifo:
ele = lifo.pop()
for k in adj[ele,:]:
if labels[k] == -1:
labels[k] = labels[i]
lifo.append(k)
elif labels[k] != labels[i]:
labels[labels == labels[i]] = labels[k]
remains = (labels == -1)
n = N - len(np.nonzero(remains)[0])
unique = np.unique(labels)
labels_ = np.zeros(N, dtype = np.int) - 1
for i, label in enumerate(unique):
choice = (labels == label)
N_cl = len(np.nonzero(choice)[0])
print("We found a cluster with N = %d"%N_cl)
labels_[choice] = i
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
fixticks(ax)
plt.show()
colors_ = np.array(colors)
for i in range(N):
for j in range(N_k):
ax.plot([X0[i],X0[adj[i,j]]],[X1[i],X1[adj[i,j]]], color = colors_[labels_[i]], alpha = 0.3)
ax.grid(True)
ax.scatter(X0,X1, s = 60, color = "Black")
plt.show()
答案 0 :(得分:2)
我认为你可以一起使用scipy的connected_components
和scikit-learn的kneighbors_graph
。这会产生你想要的吗?
from sklearn import neighbors
from scipy.sparse import csgraph
adj = neighbors.kneighbors_graph(X, N_k)
n_components, labels = csgraph.connected_components(adj)