贝叶斯终身估计使用pymc3 / theano

时间:2015-10-22 16:55:20

标签: python-3.4 bayesian theano pymc3

我正在尝试估算以下模型

enter image description here

我提供了统一的先验enter image description here,我编码了可能性enter image description here。后者来自这个paper,如下所示:

enter image description here

在theano / pymc3实现中,我计算first_termsecond_term中rhs的第一和第二项。最后logp总结了整个样本。

Theano,他自己正在产生一些输出但是当我将它集成到pymc3模型中时会出现以下错误:

TypeError: ('Bad input argument to theano function with name "<ipython-input-90-a5304bf41c50>:27" at index 0(0-based)', 'Expected an array-like object, but found a Variable: maybe you are trying to call a function on a (possibly shared) variable instead of a numeric array?')

我认为问题是如何向theano提供pymc3变量。

from pymc3 import Model, Uniform, DensityDist 
import theano.tensor as T
import theano
import numpy as np

p_test, theta_test = .1, .1
X = np.asarray([[1,2,3],[1,2,3]])

### theano test
theano.config.compute_test_value = 'off'
obss = T.matrix('obss')
p, theta = T.scalar('p'), T.scalar('theta')
def first_term(obs, p, theta):
    x, tx, n = obs[0], obs[1], obs[2]
    first_comp = p ** x * (1 - p) ** (n - x) * (1 - theta) ** n
    return(first_comp)

def second_term(obs, p, theta):
    x, tx, n = obs[0], obs[1], obs[2]

    components, updates = theano.scan(
                    lambda t, p, theta, x, tx: p ** x * (1 - theta) ** (tx-x+t) * theta * (1 - theta) ** (tx + t),
                    sequences=theano.tensor.arange(n), non_sequences = [p, theta, x, tx]
    )
    return(components)

def logp(X, p_hat, theta_hat):
    contributions, updates = theano.scan(lambda obs, p, theta: first_term(obs, p, theta) + T.sum( second_term(obs, p, theta) ) ,
                             sequences = obss, non_sequences = [p, theta]
    )
    ll = contributions.sum()
    get_ll = theano.function(inputs = [obss, p, theta], outputs = ll)

    return(get_ll(X, p_hat , theta_hat))

print( logp( X, p_test, theta_test ) ) # It works!

### pymc3 implementation
with Model() as bg_model:

    p = Uniform('p', lower = 0, upper = 1)
    theta = Uniform('theta', lower = 0, upper = .2)


    def first_term(obs, p, theta):
        x, tx, n = obs[0], obs[1], obs[2]
        first_comp = p ** x * (1 - p) ** (n - x) * (1 - theta) ** n
        return(first_comp)

    def second_term(obs, p, theta):
        x, tx, n = obs[0], obs[1], obs[2]

        components, updates = theano.scan(
                        lambda t, p, theta, x, tx: p ** x * (1 - theta) ** (tx-x+t) * theta * (1 - theta) ** (tx + t),
                        sequences=theano.tensor.arange(n), non_sequences = [p, theta, x, tx]
        )
        return(components)

    def logp(X):
        contributions, updates = theano.scan(lambda obs, p, theta: first_term(obs, p, theta) + T.sum( second_term(obs, p, theta) ) ,
                                 sequences = obss, non_sequences = [p, theta]
        )
        ll = contributions.sum()
        get_ll = theano.function(inputs = [obss, p, theta], outputs = ll)
        return(get_ll(X, p, theta))

    y = pymc3.DensityDist('y', logp, observed = X) # Nx4 y = f(f,x,tx,n | p, theta)

我的第一个猜测是使用logp修改return(get_ll(X, p.eval(), theta.eval()))但是theano抱怨图表中遗漏了一些神秘的p_interval。任何线索?

1 个答案:

答案 0 :(得分:3)

我通过以下方式得出结论:i)简化事情ii)避免在编码可能性时使用theano运算符和iii)使用内置包装确定性变量的确定性转换(我的生命周期)。为了加速计算,我通过在rhs上写第二项作为几何级数的解来对矢量化进行了矢量化。这是代码,以防有人想在他自己的终生应用程序上测试它。

from pymc3 import Model, Uniform, Deterministic 
import pymc3
from scipy import optimize
import theano.tensor as T

X = array([[ 5, 64,  8, 13],[ 4, 71, 23, 41],[ 7, 70,  4, 19]) 
#f, n, x, tx where f stands for the frequency of the triple (n, x, tx)

class CustomDist(pymc3.distributions.Discrete):
    def __init__(self, p, theta, *args, **kwargs):
        super(CustomDist, self).__init__(*args, **kwargs)
        self.p = p
        self.theta = theta

    def logp(self, X):
        p = self.theta
        theta = self.theta
        f, n, x, tx = X[0],(X[1] + 1),X[2],(X[3] + 1) #time indexed at 0, x > n
        ll =  f * T.log(  p ** x * (1 - p) ** (n - x) * (1 - theta) ** n + 
                [(1 - p) ** (tx - x) * (1 - theta) ** tx - (1 - p) ** (n - x) * (1 - theta) ** n] / (1 - (1 - p) * (1 - theta)) )
        # eliminated -1 since would result in negatice ll
        return(T.sum( ll ))

with Model() as bg_model:
    p = Uniform('p', lower = 0, upper = 1)
    theta = Uniform('theta', lower = 0, upper = 1)
    like = CustomDist('like', p = p, theta = theta, observed=X.T) #likelihood

    lt = pymc3.Deterministic('lt', p / theta)
    # start = {'p':.5, 'theta':.1}
    start = pymc3.find_MAP(fmin=optimize.fmin_powell)
    step = pymc3.Slice([p, theta, lt])
    trace = pymc3.sample(5000, start = start, step = [step])

pymc3.traceplot(trace[2000:])