我正在尝试使用PyMC3重现随机疲劳极限模型(Pascual& Meeker 1999,Ryan 2003 doi:10.1198 / 1061860032012)。我提出的代码的性能非常糟糕:
Applied log-transform to s_v and added transformed s_v_log to model.
Applied log-transform to tau and added transformed tau_log to model.
[ 0% ] 2 of 100000 complete in 1.7 sec
[ 0% ] 3 of 100000 complete in 2.9 sec
[ 0% ] 4 of 100000 complete in 3.5 sec
[ 0% ] 5 of 100000 complete in 6.2 sec
[ 0% ] 6 of 100000 complete in 7.5 sec
[ 0% ] 7 of 100000 complete in 13.2 sec
[ 0% ] 8 of 100000 complete in 13.7 sec
[ 0% ] 9 of 100000 complete in 19.4 sec
[ 0% ] 10 of 100000 complete in 113.5 sec
...
[ 0% ] 39 of 100000 complete in 588.8 sec
我怀疑问题在于疲劳寿命的边际积分与疲劳极限(梯形法则)有关。我离开了之前尝试运行代码的尝试,也评论说可能有助于提高可读性。这样做的正确方法是什么?
模型是log(寿命)~N(beta0 + beta1 * log(stress - fatigue_limit),eps)。疲劳极限是一种极限应力,它定义寿命是否有限。 log(fatigue_limit)~N(mu_v,s_v)。此外,对于必须在有限对数寿命w(称为跳动)时停止测试的观测需要右审查,因此在我的自定义似然函数中使用集总似然1-P(W <= w)用于那些观察
谢谢!
import numpy as np
import pymc3 as pymc
import theano
import theano.tensor as T
from theano.ifelse import ifelse
data = np.array([[161.908352526, 10000000.0],
[181.550578943, 10000000.0],
[201.19280536, 10000000.0],
[220.835031777, 10000000.0],
[240.477258194, 10000000.0],
[260.119484611, 3771909.80463],
[279.761711028, 3031517.02602],
[299.403937445, 246228.344425],
[319.046163862, 164947.588452],
[338.688390279, 57509.1400708],
[358.330616697, 80404.6132032],
[377.972843114, 38003.7533737],
[397.615069531, 5875.28886189],
[417.257295948, 1337.63562072],
[436.899522365, 1641.72977154],
[456.541748782, 184.309099829],
[476.183975199, 239.35420232]])
s = data[:,0] # stresses
y = data[:,1] # lifetimes
infty = 1e7 # Run-out limit
c = np.zeros(y.shape) # Censor vector
c[y <= infty] = 0 # Broken, finite lifetime
c[y > infty] = 1 # Survived, right-censor
x = np.log(s) # Logarithmic stresses
w = np.log(y) # Logarithmic lifetimes
with pymc.Model() as model:
# Priors
b0 = pymc.Normal('b0', 70.0, 1.0/35.0**2) # Constant parameter
b1 = pymc.Normal('b1', -10.0, 1.0/5.0**2) # Slope parameter
mu_v = pymc.Normal('mu_v', np.log(450.0), 1.0/np.log(0.2**2+1)) # Log-Fatigue limit mean
s_v = pymc.Lognormal('s_v', np.log(np.sqrt(np.log(0.2**2+1)))-0.5*np.log(0.2**2+1), 1.0/np.log(0.2**2+1)) # Log-Fatigue limit standard deviation
tau = pymc.Gamma('tau', 0.01, 0.01) # Measurement precision
v = pymc.Normal('v', mu_v, 1.0/s_v**2) # Log-Fatigue limit
def mu(x, b0, b1, v): # Logarithmic Random Fatigue-Limit lifetime median value
# if x-v<=0: # Stress below fatigue limit
# return 1e200 # Big number
# else:
# return b0 + b1*np.log(np.exp(x)-np.exp(v))
results, updates = theano.scan(lambda vi: ifelse(T.lt(x, vi), 1e200, b0 + b1*T.log(T.exp(x)-T.exp(vi))), sequences=v)
return results
def p_w(w, x, b0, b1, mu_v, s_v, tau, N=200):
# Lifetime distribution
# Integration limits
# a = min(x, mu_v - 6*s_v)
# b = min(x, mu_v + 6*s_v)
a = ifelse(T.lt(mu_v-6*s_v, x), mu_v-6*s_v, x)
b = ifelse(T.lt(mu_v+6*s_v, x), mu_v+6*s_v, x)
dv = (b-a)/N
# Trapezoidal quadrature
# sum = 0.0
# for i in range(N+1):
## fi = norm.pdf(w, mu(x, b0, b1, a+i*dv), 1.0/np.sqrt(tau))*norm.pdf(a+i*dv, mu_v, s_v)
# fi = T.sqrt(tau/(2.0*np.pi))*T.exp(-tau/2.0*(w - mu(x, b0, b1, a+i*dv))**2)*T.sqrt(1.0/(2.0*np.pi))/s_v*T.exp(-0.5*((a+i*dv - mu_v)/s_v)**2)
# if i==0 or i==N: # End points
# sum += 0.5*fi
# else: # Interior
# sum += fi
# return sum
vs = a + T.arange(N+1)*dv
values = T.sqrt(tau/(2.0*np.pi))*T.exp(-tau/2.0*(w - mu(x, b0, b1, vs))**2)*T.sqrt(1.0/(2.0*np.pi))/s_v*T.exp(-0.5*((vs - mu_v)/s_v)**2)
return dv*(T.sum(values[1:-1]) + 0.5*values[0] + 0.5*values[-1])
def p_W(w, x, b0, b1, mu_v, s_v, tau, N=200):
# Cumulative lifetime distribution
# Integration limits
# a = min(x, mu_v - 6*s_v)
# b = min(x, mu_v + 6*s_v)
a = ifelse(T.lt(mu_v-6*s_v, x), mu_v-6*s_v, x)
b = ifelse(T.lt(mu_v+6*s_v, x), mu_v+6*s_v, x)
dv = (b-a)/N
# Trapezoidal quadrature
# sum = 0.0
# for i in range(N+1):
## fi = norm.cdf(w, mu(x, b0, b1, a+i*dv), 1.0/np.sqrt(tau))*norm.pdf(a+i*dv, mu_v, s_v)
# fi = 0.5*(1.0 + T.erf(T.sqrt(tau/2.0)*(w - mu(x, b0, b1, a+i*dv))))*T.sqrt(1.0/(2.0*np.pi))/s_v*T.exp(-0.5*((a+i*dv - mu_v)/s_v)**2)
# if i==0 or i==N: # End points
# sum += 0.5*fi
# else: # Interior
# sum += fi
# return sum
vs = a + T.arange(N+1)*dv
values = 0.5*(1.0 + T.erf(T.sqrt(tau/2.0)*(w - mu(x, b0, b1, vs))))*T.sqrt(1.0/(2.0*np.pi))/s_v*T.exp(-0.5*((vs - mu_v)/s_v)**2)
return dv*(T.sum(values[1:-1]) + 0.5*values[0] + 0.5*values[-1])
def Li(value):
# Log-likelihood of observation
# value = np.array([ci, wi, xi])
# ci = 0 : Broken | 1 : Survived
# wi : log-lifetime
# xi : log-stress
ci = value[0]
wi = value[1]
xi = value[2]
# if ci==0: # Finite lifetime
# return np.log(p_w(wi, xi, b0, b1, mu_v, s_v, tau))
# return T.log(p_w(wi, xi, b0, b1, mu_v, s_v, tau))
# else: # Right-censored observation
# return np.log(1.0-p_W(wi, xi, b0, b1, mu_v, s_v, tau))
# return T.log(1.0-p_W(wi, xi, b0, b1, mu_v, s_v, tau))
return ifelse(T.eq(ci, 0), T.log(p_w(wi, xi, b0, b1, mu_v, s_v, tau)), T.log(1.0-p_W(wi, xi, b0, b1, mu_v, s_v, tau)))
def L(values):
# Log-likelihood of observations
# retval = 0.0
# for i in range(values.shape[0].eval()):
# retval += Li(values[i,:])
# return retval
results, updates = theano.scan(lambda i: Li(values[i]), sequences=T.arange(values.shape[0]))
return T.sum(results)
data = np.vstack([c,w,x]).T
mylike = pymc.DensityDist('mylike', L, observed=data)
# mu, sds, elbo = pymc.variational.advi(n=200000) # pymc advi
trace = pymc.sample(100000, pymc.NUTS()) # pymc NUTS