如何使用Python的pymcmcstat库计算贝叶斯模型选择

时间:2020-05-29 09:32:39

标签: python model selection bayesian mcmc

我迷失在python的pymcmcstat文档中。我设法绘制了参数分布等图,但是当涉及贝叶斯因子时,我需要为每个模型计算似然性参数空间上的积分。

我关注了this video。每个模型都具有带有不同参数的不同模型功能。根据{{​​3}},我应该比较模型证据以进行模型选择。我所拥有的只是在burnin之后的链结果,该链结果返回每个参数的分布,平方和误差(SSE)和方差的链。如何将模型与我拥有的mcmc chain结果进行比较?

我从这里去哪里?

这是我对一个模型的代码;对于每个模型,都会更改test_modelfun并保存链结果,以便进一步比较不同模型;

# Data related lines: input omega and output fm
x = (np.array([76.29395, 152.5879, 305.1758, 610.3516, 1220.703,     2441.406, 4882.813, 9765.625, 19531.25, 39062.5, 78125, 156250, 312500, 625000]))
y = np.array([155.6412886 -63.3826188j , 113.9114436 -79.90544719j,  64.97809441-77.65152741j,  26.87482243-57.38474656j,    7.44462341-34.02438426j, 2.32954856-16.17918216j,    2.30747953 -6.72487436j,   3.39658859 -2.72444011j,    4.0084345  -1.2029167j ,   4.25877486 -0.70276446j, 4.11761329 -0.69591231j,    3.83339489 -0.65244854j,    3.47289164 -0.6079278j ,  3.07027319 -0.14914359j])
#import mcmc library and add data to the library in the second line below
mcstat = MCMC()
mcstat.data.add_data_set(x,y)
##define transfer function model calculated with theta parameters
def test_modelfun(xdata, theta):

    K, alpha_0, alpha_1, Tp_1, Tp_2, Tz_1 = 10**theta[0], 10**theta[1], 10**theta[2], 10**theta[3], 10**theta[4], 10**theta[5]
    #####################
    Pz_0 = (omega**(alpha_0))
    Pz_1 = (np.sqrt(((Tp_1**2)*(omega**(2*alpha_1))) + (2*Tp_1*(omega**alpha_1)*cos(alpha_1*pi/2)) +1))
    Pz_2 = (np.sqrt(((Tp_2**2)*(omega**(2*alpha_1))) + (2*Tp_2*(omega**alpha_1)*cos(alpha_1*pi/2)) +1))
    Zz_1 = (np.sqrt(((Tz_1**2)*(omega**(2*alpha_1))) + (2*Tz_1*(omega**alpha_1)*cos(alpha_1*pi/2)) +1))
    Pp_0 = np.array([(-1*pi*alpha_0)/2]*len(omega)).T#[0]
    Pp_1 = np.array([math.atan((Tp_1*(omega[i]**alpha_1)*sin(pi*alpha_1/2))/(1+(Tp_1*(omega[i]**alpha_1)*cos(pi*alpha_1/2)))) for i in range(len(omega))])
    Pp_2 = np.array([math.atan((Tp_2*(omega[i]**alpha_1)*sin(pi*alpha_1/2))/(1+(Tp_2*(omega[i]**alpha_1)*cos(pi*alpha_1/2)))) for i in range(len(omega))])
    Zp_1 = np.array([math.atan((Tz_1*(omega[i]**alpha_1)*sin(pi*alpha_1/2))/(1+(Tz_1*(omega[i]**alpha_1)*cos(pi*alpha_1/2)))) for i in range(len(omega))])
    #####################
    Z_est = (K*Zz_1)/(Pz_0*Pz_1*Pz_2)
    P_est = Zp_1 + Pp_0 - Pp_1 - Pp_2
    #####################
    R_est  = np.real([cmath.rect(Z_est[i], P_est[i]) for i in range(len(omega))])#abs()#[:,0]
    X_est  = np.imag([cmath.rect(Z_est[i], P_est[i]) for i in range(len(omega))])#abs()#[:,0]
    RX_est = (R_est + 1j*X_est)
    return RX_est
def modelfun(xdata, theta):
    ymodel = test_modelfun(xdata,theta)
    Zest = 20*log10(np.abs(ymodel))
    return Zest

##define sum of squares function for the error in evaluating the likelihood function L(Fobs(i)|q)
def test_ssfun(theta,data):
    xdata = data.xdata[0]
    ydata = data.ydata[0]
    ymodel = test_modelfun(xdata,theta)
    return (1/len(omega))*(sum((real(fm)- real(ymodel))**2 + (imag(fm)-imag(ymodel))**2))
    #sumsquares = sum((ymodel[:,0]-ydata[:,0])**2)
##import mcmc library and add data to the library in the second line below
itr = 50.0e4
verb = 1
wbar = 1
mcstat = MCMC()
mcstat.data.add_data_set(x,y)
## add model parameters
mcstat.parameters.add_model_parameter(name='th_1',theta0=1, minimum=-2,maximum=3) #m_k, M_k = -2, 3
mcstat.parameters.add_model_parameter(name='th_2',theta0=-1, minimum=-4,maximum=0) #m_a0, M_a0 = -4, 0
mcstat.parameters.add_model_parameter(name='th_3',theta0=-1, minimum=-3,maximum=0) #m_a1, M_a1 = -3, 0
mcstat.parameters.add_model_parameter(name='th_4',theta0=-4, minimum=-9,maximum=0) #m_p1, M_p1 = -9, 0
mcstat.parameters.add_model_parameter(name='th_5',theta0=-4, minimum=-9,maximum=0) #m_p2, M_p2 = -9, 0
mcstat.parameters.add_model_parameter(name='th_6',theta0=-4, minimum=-9,maximum=0) #m_z1, M_z1 = -9, 0
## define simulation options: mh=metropolis-hastings, am=adaptive  metropolis, dr=delayed rejection, dram=dr+am
mcstat.simulation_options.define_simulation_options(nsimu=int(itr), updatesigma=1, method='dr', adaptint=100, verbosity=verb, waitbar=wbar)
## define model settings
mcstat.model_settings.define_model_settings(sos_function=test_ssfun)
mcstat.run_simulation()
## extract results
results=mcstat.simulation_results.results
chain = results['chain']# chain for each parameter sampled during  simulation. s2
s2chain = results['s2chain']# chain for error variances. if      updatesigma=0 then s2chain is an empty list
sschain = results['sschain']# chain for sum-of-squares error calculated using each set of parameter values in the cahin
names = results['names']
burnin = int(itr/2)
## display chain statistics
mcstat.chainstats(chain[burnin:,:],results)
mcpl = mcstat.mcmcplot
figcp = mcpl.plot_chain_panel(chain, names, figsizeinches = (7,6))
axes = figcp.get_axes()
for ii, ax in enumerate(axes):
    ch = chain[:, ii]
    ax.plot([burnin, burnin], [ch.min(), ch.max()], 'r')
figpd = mcpl.plot_density_panel(chain[burnin:,:], names, figsizeinches=(7,6))
figpc = mcpl.plot_pairwise_correlation_panel(chain[burnin:,:], names, figsizeinches = (7,6))
mcstat.PI.setup_prediction_interval_calculation(results=results,     data=mcstat.data, modelfunction=modelfun, burnin=burnin)
mcstat.PI.generate_prediction_intervals(calc_pred_int=True, waitbar=False)
fg, ax = mcstat.PI.plot_prediction_intervals(adddata=True,  plot_pred_int=True, figsizeinches = (7,5), data_display=dict(color='k'))

0 个答案:

没有答案