Question

我想用sklearn拟合2分量混合模型，然后计算后验后验概率。但是到目前为止，我使用的代码对两种发行版之一的拟合是完美的（过度拟合吗？），而另一种则非常差。我做了一个虚拟示例，采样了2个高斯

import numpy as np
from sklearn.mixture import GaussianMixture
import matplotlib.pyplot as plt 

def calc_pdf():
    """
    calculate gauss mixture modelling for 2 comp
    return pdfs 
    """
    d = np.random.normal(-0.1, 0.07, 5000)
    t = np.random.normal(0.2, 0.13, 10000)
    pool = np.concatenate([d, t]).reshape(-1,1)
    label = ['d']*d.shape[0] + ['t'] * t.shape[0]
    X = pool[pool>0].reshape(-1,1)
    X = np.log(X)
    clf = GaussianMixture(
                        n_components=2,
                        covariance_type='full',
                        tol = 1e-24,
                        max_iter = 1000
                        )
    logprob = clf.fit(X).score_samples(X)
    responsibilities = clf.predict_proba(X)
    pdf = np.exp(logprob)
    pdf_individual = responsibilities * pdf[:, np.newaxis]
    plot_gauss(np.log(d), np.log(t), pdf_individual, X)
    return pdf_individual[0], pdf_individual[1]

def plot_gauss(d, t, pdf_individual, x):
    fig, ax = plt.subplots(figsize=(12, 9), facecolor='white')
    ax.hist(d, 30, density=True, histtype='stepfilled', alpha=0.4)
    ax.hist(t, 30, density=True, histtype='stepfilled', alpha=0.4)
    ax.plot(x, pdf_individual, '.')
    ax.set_xlabel('$x$')
    ax.set_ylabel('$p(x)$')
    plt.show()
calc_pdf()

在此处生成此图

我明显缺少什么吗？

GMM不适合2高斯的sklearn

0 个答案: