运行高斯混合模型时,ValueError定义不明确的经验协方差

时间:2017-07-24 14:39:18

标签: python-3.x machine-learning scikit-learn

运行高斯混合模型时出现以下错误:

ValueError: Fitting the mixture model failed because some components have ill-defined empirical covariance (for instance caused by singleton or collapsed samples). Try to decrease the number of components, or increase reg_covar.

我使用的矩阵具有相对较大的形状,因此很难在此页面上显示,但是,这里是一个概述

[[  6.10086000e+05   1.58787000e+05   0.00000000e+00 ...,   8.00000000e+00
    0.00000000e+00   0.00000000e+00]
 [  2.36273000e+05   1.48953000e+05   0.00000000e+00 ...,   5.00000000e+00
    0.00000000e+00   0.00000000e+00]
 [  1.70486000e+05   1.53083000e+05   0.00000000e+00 ...,   3.50000000e+01
    0.00000000e+00   0.00000000e+00]
 ..., 
 [  0.00000000e+00   0.00000000e+00   0.00000000e+00 ...,   0.00000000e+00
    0.00000000e+00   0.00000000e+00]
 [  0.00000000e+00   0.00000000e+00   0.00000000e+00 ...,   2.00000000e+01
    0.00000000e+00   0.00000000e+00]
 [  0.00000000e+00   0.00000000e+00   0.00000000e+00 ...,   2.00000000e+00
    0.00000000e+00   1.00000000e+00]]

这是运行GMM的代码段:

def perform_gaussian_mixture():
    colors = ["navy", "cyan", "darkorange", "orchid", "lime"]

    def make_ellipses(gmm, ax):
        for n, color in enumerate(colors):
            if gmm.covariance_type == 'full':
                covariances = gmm.covariances_[n][:2, :2]
            elif gmm.covariance_type == 'tied':
                covariances = gmm.covariances_[:2, :2]
            elif gmm.covariance_type == 'diag':
                covariances = np.diag(gmm.covariances_[n][:2])
            elif gmm.covariance_type == 'spherical':
                covariances = np.eye(gmm.means_.shape[1]) * gmm.covariances_[n]
            v, w = np.linalg.eigh(covariances)
            u = w[0] / np.linalg.norm(w[0])
            angle = np.arctan2(u[1], u[0])
            angle = 180 * angle / np.pi  # convert to degrees
            v = 2. * np.sqrt(2.) * np.sqrt(v)
            ell = mpl.patches.Ellipse(gmm.means_[n, :2], v[0], v[1],
                                      180 + angle, color=color)
            ell.set_clip_box(ax.bbox)
            ell.set_alpha(0.5)
            ax.add_artist(ell)

    data = matricize()

    x_train, x_test, y_train, y_test = train_test_split(data["data"], data["target"], test_size=0.25, random_state=42)

    n_classes = len(np.unique(data["target"]))

    # Try GMMs using different types of covariances.
    estimators = dict((cov_type, mixture.GaussianMixture(n_components=n_classes,
                                                         covariance_type=cov_type, max_iter=20, random_state=0))
                      for cov_type in ["full"])

    n_estimators = len(estimators)

    plt.figure(figsize=(3 * n_estimators // 2, 6))
    plt.subplots_adjust(bottom=.01, top=0.95, hspace=.15, wspace=.05,
                        left=.01, right=.99)

    for index, (name, estimator) in enumerate(estimators.items()):

        # Since we have class labels for the training data, we can
        # initialize the GMM parameters in a supervised manner.
        estimator.means_init = np.array([x_train[np.where(y_train == i)].mean(axis=0)
                                         for i in range(n_classes)])

        # Train the other parameters using the EM algorithm.
        estimator.fit(x_train)

        h = plt.subplot(2, n_estimators // 2, index + 1)
        make_ellipses(estimator, h)

        for n, color in enumerate(colors):
            d = data["data"][np.where(data["target"] == n)]
            plt.scatter(d[:, 0], d[:, 1], s=0.8, color=color,
                        label=data["target_names"][n])
        # Plot the test data with crosses
        for n, color in enumerate(colors):
            d = x_test[np.where(y_test == n)]
            plt.scatter(d[:, 0], d[:, 1], marker='x', color=color)

        y_train_pred = estimator.predict(x_train)
        train_accuracy = np.mean(y_train_pred.ravel() == y_train.ravel()) * 100
        plt.text(0.05, 0.9, 'Train accuracy: %.1f' % train_accuracy,
                 transform=h.transAxes)

        y_test_pred = estimator.predict(x_test)
        test_accuracy = np.mean(y_test_pred.ravel() == y_test.ravel()) * 100
        plt.text(0.05, 0.8, 'Test accuracy: %.1f' % test_accuracy,
                 transform=h.transAxes)

        plt.xticks(())
        plt.yticks(())
        plt.title(name)

    plt.legend(scatterpoints=1, loc='lower right', prop=dict(size=12))
    plt.show()

如果需要更多信息,请与我们联系。

0 个答案:

没有答案