使用pymc3分类分布与具有多个试验的数据

时间:2017-03-16 23:42:21

标签: python bayesian pymc pymc3

我试图在实验的几次试验(或重复试验)中检测转换点(可以改变试验到试验)。这是我想要做的一个虚拟的例子:

import numpy as np
import pymc3 as pm
import theano.tensor as tt

# Data has 2 trials/independent runs, each with 500 observations (which can be 0, 1 or 2)
# Data switches from uniform probabilities to (1/6,2/3,1/6) at 250 on both trials
data = np.zeros((2,500))
data[0, :250] = np.random.choice(np.arange(3), size = 250, p = [1/3, 1/3, 1/3])
data[0, 250:] = np.random.choice(np.arange(3), size = 250, p =  [1/6, 2/3, 1/6])
data[1, :250] = np.random.choice(np.arange(3), size = 250, p =  [1/3, 1/3, 1/3])
data[1, 250:] = np.random.choice(np.arange(3), size = 250, p =  [1/6, 2/3, 1/6])

with pm.Model() as model:
    # 2 emission 'states', each with 3 possible emissions
    p = pm.Dirichlet('p', np.ones(3), shape = (2, 3)) 
    switchpoint = pm.DiscreteUniform('switchpoint', lower = 0, upper = 500)
    state = []  

    for i in range(2):
        state.append(tt.switch(switchpoint >= np.arange(500), 0, 1))
    obs = pm.Categorical('obs', p = p[state], observed = data)

然而,分类分布似乎不能在同一数据集中处理多个重复试验,当分布试图将概率总结为1时,我得到一个错误:

ValueError                                Traceback (most recent call last)
<ipython-input-27-7ccdbe0bf9c9> in <module>()
      1 with model:
----> 2         obs = pm.Categorical('obs', p = p[state], observed = data)

/home/narendra/anaconda3/lib/python3.6/site-packages/pymc3-3.0-py3.6.egg/pymc3/distributions/distribution.py in __new__(cls, name, *args, **kwargs)
     34                 raise TypeError("observed needs to be data but got: {}".format(type(data)))
     35             total_size = kwargs.pop('total_size', None)
---> 36             dist = cls.dist(*args, **kwargs)
     37             return model.Var(name, dist, data, total_size)
     38         else:

/home/narendra/anaconda3/lib/python3.6/site-packages/pymc3-3.0-py3.6.egg/pymc3/distributions/distribution.py in dist(cls, *args, **kwargs)
     45     def dist(cls, *args, **kwargs):
     46         dist = object.__new__(cls)
---> 47         dist.__init__(*args, **kwargs)
     48         return dist
     49 

/home/narendra/anaconda3/lib/python3.6/site-packages/pymc3-3.0-py3.6.egg/pymc3/distributions/discrete.py in __init__(self, p, *args, **kwargs)
    433             self.k = tt.shape(p)[-1]
    434         self.p = p = tt.as_tensor_variable(p)
--> 435         self.p = (p.T / tt.sum(p, -1)).T
    436         self.mode = tt.argmax(p)
    437 

/home/narendra/anaconda3/lib/python3.6/site-packages/theano/tensor/var.py in __truediv__(self, other)
    202 
    203     def __truediv__(self, other):
--> 204         return theano.tensor.basic.true_div(self, other)
    205 
    206     def __floordiv__(self, other):

/home/narendra/anaconda3/lib/python3.6/site-packages/theano/gof/op.py in __call__(self, *inputs, **kwargs)
    666                 thunk.outputs = [storage_map[v] for v in node.outputs]
    667 
--> 668                 required = thunk()
    669                 assert not required  # We provided all inputs
    670 

/home/narendra/anaconda3/lib/python3.6/site-packages/theano/gof/op.py in rval()
    881 
    882         def rval():
--> 883             fill_storage()
    884             for o in node.outputs:
    885                 compute_map[o][0] = True

/home/narendra/anaconda3/lib/python3.6/site-packages/theano/gof/cc.py in __call__(self)
   1705                 print(self.error_storage, file=sys.stderr)
   1706                 raise
-> 1707             reraise(exc_type, exc_value, exc_trace)
   1708 
   1709 

/home/narendra/anaconda3/lib/python3.6/site-packages/six.py in reraise(tp, value, tb)
    684         if value.__traceback__ is not tb:
    685             raise value.with_traceback(tb)
--> 686         raise value
    687 
    688 else:

ValueError: Input dimension mis-match. (input[0].shape[1] = 500, input[1].shape[1] = 2)

有没有办法在pymc3的几个试验中处理分类排放?分别对每个试验进行分析对我来说不是一个选择,因为我想构建一个层次模型,其中实际数据在许多重复试验中在不同类型的状态之间切换,每个状态都有自己的分类发射概率。

0 个答案:

没有答案