我想用sklearn拟合2分量混合模型,然后计算后验后验概率。但是到目前为止,我使用的代码对两种发行版之一的拟合是完美的(过度拟合吗?),而另一种则非常差。我做了一个虚拟示例,采样了2个高斯
import numpy as np
from sklearn.mixture import GaussianMixture
import matplotlib.pyplot as plt
def calc_pdf():
"""
calculate gauss mixture modelling for 2 comp
return pdfs
"""
d = np.random.normal(-0.1, 0.07, 5000)
t = np.random.normal(0.2, 0.13, 10000)
pool = np.concatenate([d, t]).reshape(-1,1)
label = ['d']*d.shape[0] + ['t'] * t.shape[0]
X = pool[pool>0].reshape(-1,1)
X = np.log(X)
clf = GaussianMixture(
n_components=2,
covariance_type='full',
tol = 1e-24,
max_iter = 1000
)
logprob = clf.fit(X).score_samples(X)
responsibilities = clf.predict_proba(X)
pdf = np.exp(logprob)
pdf_individual = responsibilities * pdf[:, np.newaxis]
plot_gauss(np.log(d), np.log(t), pdf_individual, X)
return pdf_individual[0], pdf_individual[1]
def plot_gauss(d, t, pdf_individual, x):
fig, ax = plt.subplots(figsize=(12, 9), facecolor='white')
ax.hist(d, 30, density=True, histtype='stepfilled', alpha=0.4)
ax.hist(t, 30, density=True, histtype='stepfilled', alpha=0.4)
ax.plot(x, pdf_individual, '.')
ax.set_xlabel('$x$')
ax.set_ylabel('$p(x)$')
plt.show()
calc_pdf()
在此处生成此图
我明显缺少什么吗?