我很难找到并行拟合高斯混合EM聚类模型的正确方法。
我有一个带有 12核的Windows,我想利用它。
到目前为止,我已经关注了这些链接。
Implement Parallel for loops in Python
我必须遵循我想并行运行的EM集群模型的实现。 如果嵌套循环有问题,我可以让外部循环并行运行。
maxCLust=18
from sklearn import mixture
lowest_bic = np.infty
bic = []
n_components_range = range(1, maxCLust)
cv_types = ['spherical', 'tied', 'diag', 'full']
methods_in=['kmeans', 'random']
for method_in in methods_in:
for cv_type in cv_types:
for n_components in n_components_range:
# Fit a Gaussian mixture with EM
gmm = mixture.GaussianMixture(n_components=n_components,
covariance_type=cv_type,
init_params=method_in)
gmm.fit(np.asarray(X))
print(n_components)
bic.append(gmm.bic(np.asarray(X)))
if bic[-1] < lowest_bic:
lowest_bic = bic[-1]
best_gmm = gmm
bic = np.array(bic)
bestEMmodel = best_gmm
labels = bestEMmodel.predict(X)
bestEMmodel.n_components
labels
这是我最后一次尝试
import multiprocessing as mp
nprocs = mp.cpu_count()
print(f"Number of CPU cores: {nprocs}")
pool = mp.Pool(processes=nprocs)
from sklearn import mixture
def GMMbic (n_components_range, myData):
lowest_bic = np.infty
bic = []
n_components_range = n_components_range
cv_types = ['spherical', 'tied', 'diag', 'full']
methods_in=['kmeans', 'random']
for method_in in methods_in:
for cv_type in cv_types:
for n_components in n_components_range:
# Fit a Gaussian mixture with EM
gmm = mixture.GaussianMixture(n_components=n_components,
covariance_type=cv_type,
init_params=method_in)
gmm.fit(np.asarray(myData))
bic.append(gmm.bic(np.asarray(myData)))
if bic[-1] < lowest_bic:
lowest_bic = bic[-1]
best_gmm = gmm
return best_gmm
results = [pool.apply(GMMbic, args=(18,X))]