PyMC3和theano可能性的边际分布集成

时间:2016-09-12 13:06:19

标签: theano pymc3

我正在尝试使用PyMC3重现随机疲劳极限模型(Pascual& Meeker 1999,Ryan 2003 doi:10.1198 / 1061860032012)。我提出的代码的性能非常糟糕:

Applied log-transform to s_v and added transformed s_v_log to model.
Applied log-transform to tau and added transformed tau_log to model.
[                  0%                  ] 2 of 100000 complete in 1.7 sec
[                  0%                  ] 3 of 100000 complete in 2.9 sec
[                  0%                  ] 4 of 100000 complete in 3.5 sec
[                  0%                  ] 5 of 100000 complete in 6.2 sec
[                  0%                  ] 6 of 100000 complete in 7.5 sec
[                  0%                  ] 7 of 100000 complete in 13.2 sec
[                  0%                  ] 8 of 100000 complete in 13.7 sec
[                  0%                  ] 9 of 100000 complete in 19.4 sec
[                  0%                  ] 10 of 100000 complete in 113.5 sec
...
[                  0%                  ] 39 of 100000 complete in 588.8 sec

我怀疑问题在于疲劳寿命的边际积分与疲劳极限(梯形法则)有关。我离开了之前尝试运行代码的尝试,也评论说可能有助于提高可读性。这样做的正确方法是什么?

模型是log(寿命)~N(beta0 + beta1 * log(stress - fatigue_limit),eps)。疲劳极限是一种极限应力,它定义寿命是否有限。 log(fatigue_limit)~N(mu_v,s_v)。此外,对于必须在有限对数寿命w(称为跳动)时停止测试的观测需要右审查,因此在我的自定义似然函数中使用集总似然1-P(W <= w)用于那些观察

谢谢!

import numpy as np
import pymc3 as pymc
import theano
import theano.tensor as T
from theano.ifelse import ifelse

data = np.array([[161.908352526, 10000000.0],
    [181.550578943, 10000000.0],
    [201.19280536, 10000000.0],
    [220.835031777, 10000000.0],
    [240.477258194, 10000000.0],
    [260.119484611, 3771909.80463],
    [279.761711028, 3031517.02602],
    [299.403937445, 246228.344425],
    [319.046163862, 164947.588452],
    [338.688390279, 57509.1400708],
    [358.330616697, 80404.6132032],
    [377.972843114, 38003.7533737],
    [397.615069531, 5875.28886189],
    [417.257295948, 1337.63562072],
    [436.899522365, 1641.72977154],
    [456.541748782, 184.309099829],
    [476.183975199, 239.35420232]])

s = data[:,0] # stresses
y = data[:,1] # lifetimes
infty = 1e7 # Run-out limit

c = np.zeros(y.shape) # Censor vector
c[y <= infty] = 0 # Broken, finite lifetime
c[y > infty] = 1 # Survived, right-censor

x = np.log(s) # Logarithmic stresses
w = np.log(y) # Logarithmic lifetimes

with pymc.Model() as model:
    # Priors
    b0 = pymc.Normal('b0', 70.0, 1.0/35.0**2) # Constant parameter
    b1 = pymc.Normal('b1', -10.0, 1.0/5.0**2) # Slope parameter
    mu_v = pymc.Normal('mu_v', np.log(450.0), 1.0/np.log(0.2**2+1)) # Log-Fatigue limit mean
    s_v = pymc.Lognormal('s_v', np.log(np.sqrt(np.log(0.2**2+1)))-0.5*np.log(0.2**2+1), 1.0/np.log(0.2**2+1)) # Log-Fatigue limit standard deviation
    tau = pymc.Gamma('tau', 0.01, 0.01) # Measurement precision
    v = pymc.Normal('v', mu_v, 1.0/s_v**2) # Log-Fatigue limit

    def mu(x, b0, b1, v): # Logarithmic Random Fatigue-Limit lifetime median value
        # if x-v<=0: # Stress below fatigue limit
            # return 1e200 # Big number
        # else:
            # return b0 + b1*np.log(np.exp(x)-np.exp(v))
        results, updates = theano.scan(lambda vi: ifelse(T.lt(x, vi), 1e200, b0 + b1*T.log(T.exp(x)-T.exp(vi))), sequences=v)
        return results

    def p_w(w, x, b0, b1, mu_v, s_v, tau, N=200):
        # Lifetime distribution
        # Integration limits
        # a = min(x, mu_v - 6*s_v)
        # b = min(x, mu_v + 6*s_v)
        a = ifelse(T.lt(mu_v-6*s_v, x), mu_v-6*s_v, x)
        b = ifelse(T.lt(mu_v+6*s_v, x), mu_v+6*s_v, x)
        dv = (b-a)/N
        # Trapezoidal quadrature
        # sum = 0.0
        # for i in range(N+1):
            ## fi = norm.pdf(w, mu(x, b0, b1, a+i*dv), 1.0/np.sqrt(tau))*norm.pdf(a+i*dv, mu_v, s_v)
            # fi = T.sqrt(tau/(2.0*np.pi))*T.exp(-tau/2.0*(w - mu(x, b0, b1, a+i*dv))**2)*T.sqrt(1.0/(2.0*np.pi))/s_v*T.exp(-0.5*((a+i*dv - mu_v)/s_v)**2)
            # if i==0 or i==N: # End points
                # sum += 0.5*fi
            # else: # Interior
                # sum += fi
        # return sum
        vs = a + T.arange(N+1)*dv
        values = T.sqrt(tau/(2.0*np.pi))*T.exp(-tau/2.0*(w - mu(x, b0, b1, vs))**2)*T.sqrt(1.0/(2.0*np.pi))/s_v*T.exp(-0.5*((vs - mu_v)/s_v)**2)
        return dv*(T.sum(values[1:-1]) + 0.5*values[0] + 0.5*values[-1])

    def p_W(w, x, b0, b1, mu_v, s_v, tau, N=200):
        # Cumulative lifetime distribution
        # Integration limits
        # a = min(x, mu_v - 6*s_v)
        # b = min(x, mu_v + 6*s_v)
        a = ifelse(T.lt(mu_v-6*s_v, x), mu_v-6*s_v, x)
        b = ifelse(T.lt(mu_v+6*s_v, x), mu_v+6*s_v, x)
        dv = (b-a)/N        
        # Trapezoidal quadrature
        # sum = 0.0
        # for i in range(N+1):
            ## fi = norm.cdf(w, mu(x, b0, b1, a+i*dv), 1.0/np.sqrt(tau))*norm.pdf(a+i*dv, mu_v, s_v)
            # fi = 0.5*(1.0 + T.erf(T.sqrt(tau/2.0)*(w - mu(x, b0, b1, a+i*dv))))*T.sqrt(1.0/(2.0*np.pi))/s_v*T.exp(-0.5*((a+i*dv - mu_v)/s_v)**2)
            # if i==0 or i==N: # End points
                # sum += 0.5*fi
            # else: # Interior
                # sum += fi
        # return sum
        vs = a + T.arange(N+1)*dv
        values = 0.5*(1.0 + T.erf(T.sqrt(tau/2.0)*(w - mu(x, b0, b1, vs))))*T.sqrt(1.0/(2.0*np.pi))/s_v*T.exp(-0.5*((vs - mu_v)/s_v)**2)
        return dv*(T.sum(values[1:-1]) + 0.5*values[0] + 0.5*values[-1])

    def Li(value):
        # Log-likelihood of observation
        # value = np.array([ci, wi, xi])
        # ci = 0 : Broken | 1 : Survived 
        # wi : log-lifetime
        # xi : log-stress
        ci = value[0]
        wi = value[1]
        xi = value[2]
        # if ci==0: # Finite lifetime
            # return np.log(p_w(wi, xi, b0, b1, mu_v, s_v, tau))
            # return T.log(p_w(wi, xi, b0, b1, mu_v, s_v, tau))
        # else: # Right-censored observation
            # return np.log(1.0-p_W(wi, xi, b0, b1, mu_v, s_v, tau))
            # return T.log(1.0-p_W(wi, xi, b0, b1, mu_v, s_v, tau))
        return ifelse(T.eq(ci, 0), T.log(p_w(wi, xi, b0, b1, mu_v, s_v, tau)), T.log(1.0-p_W(wi, xi, b0, b1, mu_v, s_v, tau)))

    def L(values):
        # Log-likelihood of observations
        # retval = 0.0
        # for i in range(values.shape[0].eval()):
            # retval += Li(values[i,:])
        # return retval
        results, updates = theano.scan(lambda i: Li(values[i]), sequences=T.arange(values.shape[0]))
        return T.sum(results)

    data = np.vstack([c,w,x]).T
    mylike = pymc.DensityDist('mylike', L, observed=data)

    # mu, sds, elbo = pymc.variational.advi(n=200000) # pymc advi
    trace = pymc.sample(100000, pymc.NUTS()) # pymc NUTS

0 个答案:

没有答案