Question

我正在尝试学习PyMC3，我想做一个简单的高斯混合示例。我找到了this示例并希望将其转换为pymc3，但我在尝试绘制traceplot时遇到错误。

n1 = 500
n2 = 200
n = n1+n2

mean1 = 21.8
mean2 = 42.0

precision = 0.1

sigma = np.sqrt(1 / precision)

# precision = 1/sigma^2
print "sigma1: %s" % sigma1
print "sigma2: %s" % sigma2

data1 = np.random.normal(mean1,sigma,n1)
data2 = np.random.normal(mean2,sigma,n2)

data = np.concatenate([data1 , data2])
#np.random.shuffle(data)

fig = plt.figure(figsize=(7, 7))
ax = fig.add_subplot(111, xlabel='x', ylabel='y', title='mixture of 2    guassians')
ax.plot(range(0,n1+n2), data, 'x', label='data')
plt.legend(loc=0)

with pm.Model() as model:
    #priors
    p = pm.Uniform( "p", 0 , 1) #this is the fraction that come from mean1 vs mean2

    ber = pm.Bernoulli( "ber", p = p) # produces 1 with proportion p.

    precision = pm.Gamma('precision', alpha=0.1, beta=0.1)

    mean1 = pm.Normal( "mean1", 0, 0.01 ) #better to use normals versus Uniforms (unless you are certain the value is  truncated at 0 and 200 
    mean2 = pm.Normal( "mean2", 0, 0.01 )

    mean = pm.Deterministic('mean', ber*mean1 + (1-ber)*mean2)

    process = pm.Normal('process', mu=mean, tau=precision, observed=data)

    # inference
    step = pm.Metropolis()
    trace = pm.sample(10000, step)
    pm.traceplot(trace)

错误：

sigma1: 3.16227766017
sigma2: 1.69030850946
 [-----------------100%-----------------] 10000 of 10000 complete in 4.4 sec
---------------------------------------------------------------------------
LinAlgError                               Traceback (most recent call last)
<ipython-input-10-eb728824de83> in <module>()
     44     step = pm.Metropolis()
     45     trace = pm.sample(10000, step)
---> 46     pm.traceplot(trace)

/usr/lib/python2.7/site-packages/pymc-3.0-py2.7.egg/pymc/plots.pyc in traceplot(trace, vars, figsize, lines, combined, grid)
     70                 ax[i, 0].set_xlim(mind - .5, maxd + .5)
     71             else:
---> 72                 kdeplot_op(ax[i, 0], d)
     73             ax[i, 0].set_title(str(v))
     74             ax[i, 0].grid(grid)

/usr/lib/python2.7/site-packages/pymc-3.0-py2.7.egg/pymc/plots.pyc in kdeplot_op(ax, data)
     94     for i in range(data.shape[1]):
     95         d = data[:, i]
---> 96         density = kde.gaussian_kde(d)
     97         l = np.min(d)
     98         u = np.max(d)

/usr/lib64/python2.7/site-packages/scipy/stats/kde.pyc in __init__(self, dataset, bw_method)
    186 
    187         self.d, self.n = self.dataset.shape
--> 188         self.set_bandwidth(bw_method=bw_method)
    189 
    190     def evaluate(self, points):

/usr/lib64/python2.7/site-packages/scipy/stats/kde.pyc in set_bandwidth(self, bw_method)
    496             raise ValueError(msg)
    497 
--> 498         self._compute_covariance()
    499 
    500     def _compute_covariance(self):

/usr/lib64/python2.7/site-packages/scipy/stats/kde.pyc in _compute_covariance(self)
    507             self._data_covariance = atleast_2d(np.cov(self.dataset, rowvar=1,
    508                                                bias=False))
--> 509             self._data_inv_cov = linalg.inv(self._data_covariance)
    510 
    511         self.covariance = self._data_covariance * self.factor**2

/usr/lib64/python2.7/site-packages/scipy/linalg/basic.pyc in inv(a, overwrite_a, check_finite)
    381         inv_a, info = getri(lu, piv, lwork=lwork, overwrite_lu=1)
    382     if info > 0:
--> 383         raise LinAlgError("singular matrix")
    384     if info < 0:
    385         raise ValueError('illegal value in %d-th argument of internal '

LinAlgError: singular matrix

Answer 1

感谢Fonnesbeck在github问题跟踪器上回答这个问题：

https://github.com/pymc-devs/pymc3/issues/452

这是更新的代码：

with pm.Model() as model:
    #priors
    p = pm.Uniform( "p", 0 , 1) #this is the fraction that come from mean1 vs mean2

    ber = pm.Bernoulli( "ber", p = p, shape=len(data)) # produces 1 with proportion p.

    sigma = pm.Uniform('sigma', 0, 100)
    precision = sigma**-2

    mean = pm.Normal( "mean", 0, 0.01, shape=2 )

    mu = pm.Deterministic('mu', mean[ber])

    process = pm.Normal('process', mu=mu, tau=precision, observed=data)

with model:
    step1 = pm.Metropolis([p, sigma, mean])
    step2 = pm.BinaryMetropolis([ber])
    trace = pm.sample(10000, [step1, step2])

在推断伯努利随机变量时需要使用BinaryMetropolis

Answer 2

更简单，更快捷的版本如下：

with pm.Model() as model2:
    p = pm.Beta( "p", 1., 1.)    
    means = pm.Uniform('mean', 15, 60, shape=2)
    sigma = pm.Uniform('sigma', 0, 20, testval=5)

    process = pm.NormalMixture('obs', tt.stack([p, 1-p]), means, sd=sigma, observed=data)

with model2:
    step = pm.Metropolis()
    trace = pm.sample(10000, step=step)

Answer 3

我知道这个问题很老，但我正在尝试使用PyMC3用法的不同例子来习惯PyMC3中的建模。上面给出的答案在PyMC3的当前版本1.0中不起作用（它没有正确地区分这两种方法）。为了使其发挥作用，我必须做的最小的改变如下：

1)
# mean = pm.Normal("mean", 0, 0.01, shape=2 )
mean = pm.Uniform('mean', 15, 60, shape=2)
2)
# step2 = pm.BinaryMetropolis([ber])
step2 = pm.ElemwiseCategorical(vars=[ber], values=[0, 1])

以防其他人遇到类似问题。

将高斯混合物转换为PyMC3

3 个答案: