使用PyMC3

时间:2017-03-23 09:48:41

标签: python pymc3 frequency-distribution dirichlet statistical-sampling

如何使用pymc3找到dirichlet priors?

我尝试过以下方法:

import pymc3 as pm
import numpy as np

population = [139212, 70192, 50000, 21000, 16000, 5000, 2000, 500, 600, 100, 10, 5, 5, 5, 5]

with pm.Model() as model:
    zipfy = pm.Dirichlet('zipfy', a=np.array([1.]), observed=population)
    tr = pm.sample(100)

但它抛出ValueError

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-14-623c21a4f35f> in <module>()
      1 with pm.Model() as model:
      2     zipfy = pm.Dirichlet('zipfy', a=np.array([1.]), observed=population)
----> 3     tr = pm.sample(100)

/Users/liling.tan/Library/Python/3.5/lib/python/site-packages/pymc3/sampling.py in sample(draws, step, init, n_init, start, trace, chain, njobs, tune, progressbar, model, random_seed)
    147         # By default, use NUTS sampler
    148         pm._log.info('Auto-assigning NUTS sampler...')
--> 149         start_, step = init_nuts(init=init, n_init=n_init, model=model)
    150         if start is None:
    151             start = start_

/Users/liling.tan/Library/Python/3.5/lib/python/site-packages/pymc3/sampling.py in init_nuts(init, n_init, model, **kwargs)
    432 
    433     if init == 'advi':
--> 434         v_params = pm.variational.advi(n=n_init)
    435         start = pm.variational.sample_vp(v_params, 1, progressbar=False, hide_transformed=False)[0]
    436         cov = np.power(model.dict_to_array(v_params.stds), 2)

/Users/liling.tan/Library/Python/3.5/lib/python/site-packages/pymc3/variational/advi.py in advi(vars, start, model, n, accurate_elbo, optimizer, learning_rate, epsilon, random_seed)
    122     # Create variational gradient tensor
    123     elbo, shared = _calc_elbo(vars, model, n_mcsamples=n_mcsamples,
--> 124                               random_seed=random_seed)
    125 
    126     # Set starting values

/Users/liling.tan/Library/Python/3.5/lib/python/site-packages/pymc3/variational/advi.py in _calc_elbo(vars, model, n_mcsamples, random_seed)
    179     logpt = tt.add(*map(tt.sum, factors))
    180 
--> 181     [logp], inarray = pm.join_nonshared_inputs([logpt], vars, shared)
    182 
    183     uw = tt.vector('uw')

/Users/liling.tan/Library/Python/3.5/lib/python/site-packages/pymc3/theanof.py in join_nonshared_inputs(xs, vars, shared, make_shared)
    180     inarray : vector of inputs
    181     """
--> 182     joined = tt.concatenate([var.ravel() for var in vars])
    183 
    184     if not make_shared:

/Users/liling.tan/Library/Python/3.5/lib/python/site-packages/theano/tensor/basic.py in concatenate(tensor_list, axis)
   4608             "or a list, make sure you did not forget () or [] around "
   4609             "arguments of concatenate.", tensor_list)
-> 4610     return join(axis, *tensor_list)
   4611 
   4612 

/Users/liling.tan/Library/Python/3.5/lib/python/site-packages/theano/tensor/basic.py in join(axis, *tensors_list)
   4357         return tensors_list[0]
   4358     else:
-> 4359         return join_(axis, *tensors_list)
   4360 
   4361 

/Users/liling.tan/Library/Python/3.5/lib/python/site-packages/theano/gof/op.py in __call__(self, *inputs, **kwargs)
    613         """
    614         return_list = kwargs.pop('return_list', False)
--> 615         node = self.make_node(*inputs, **kwargs)
    616 
    617         if config.compute_test_value != 'off':

/Users/liling.tan/Library/Python/3.5/lib/python/site-packages/theano/tensor/basic.py in make_node(self, *axis_and_tensors)
   4080         axis, tensors = axis_and_tensors[0], axis_and_tensors[1:]
   4081         if not tensors:
-> 4082             raise ValueError('Cannot join an empty list of tensors')
   4083         as_tensor_variable_args = [as_tensor_variable(x) for x in tensors]
   4084 

ValueError: Cannot join an empty list of tensors

被修改

zipfy应该是参数向量。

我试过这个:

import pymc3 as pm
import numpy as np
population = [139212, 70192, 50000, 21000, 16000, 5000, 2000, 500, 600, 100, 10, 5, 5, 5, 5]
with pm.Model() as model:
    zipfy = pm.Dirichlet('zipfy', a=np.array(population))
    tr = pm.sample(100)

print (tr['zipfy'])
print (len(tr['zipfy']), len(tr['zipfy'][0]) )

[OUT]:

array([[  4.57466959e-01,   2.30024576e-01,   1.63655813e-01, ...,
      2.79491587e-05,   1.15471055e-05,   1.21639409e-05],
   [  4.57466959e-01,   2.30024576e-01,   1.63655813e-01, ...,
      2.79491587e-05,   1.15471055e-05,   1.21639409e-05],
   [  4.57550769e-01,   2.30182026e-01,   1.63985544e-01, ...,
      1.61401840e-05,   2.90679821e-05,   2.47148304e-05],
   ..., 
   [  4.56878341e-01,   2.31362382e-01,   1.63956669e-01, ...,
      1.04361219e-05,   5.42454872e-06,   2.51727193e-05],
   [  4.57542706e-01,   2.30122065e-01,   1.63973662e-01, ...,
      2.16784018e-05,   5.42709076e-06,   7.35651589e-06],
   [  4.56698065e-01,   2.30786537e-01,   1.64125287e-01, ...,
      7.14659176e-06,   2.90901488e-05,   1.87413211e-05]])

   (100, 15)

我期待一个dirichlet priors(即参数数组)的大小为1但它的大小为100.这是预期的行为吗?如何解释trace['zipfy']的输出?啊,跟踪是pm.sample(100)的步骤?先验是在model对象里面吗?

所以给定一个离散值,让我们说5,我怎样才能找到我刚从采样器中学到的dirichlet先验?它在model对象或zipfy对象中吗?

0 个答案:

没有答案