我试图将2个向量的值组合起来,并学习其中一个向量的潜在概率分布。然而,当我尝试与DensityDist一起使用' obs'变量I没有恢复注入的分布。我仍然试图理解PyMC,所以这可能是我的误解,但我现在还是相当困难。下面的代码显示了我构建的数据和模型。我认为问题是我使用的lambda函数试图产生结果的对数概率。
%matplotlib inline
import numpy as np
from pymc3 import *
import pymc3 as pm
import pandas as pd
print 'pymc3 version: ' + pm.__version__
#INJECTED DISTRIBUTION OF PROBABILITIES
true_probs = [0.3, 0.6, 0.1]
k = len(true_probs)
y = np.array([[ 8.33875891e-01, 1.49760167e-02, 1.51148092e-01],
[ 1.36399585e-01, 7.30337980e-01, 1.33262435e-01],
[ 4.97603830e-05, 9.94778208e-01, 5.17203171e-03],
[ 5.00358612e-01, 4.84967429e-01, 1.46739594e-02],
[ 9.55022216e-01, 8.09821124e-04, 4.41679631e-02],
[ 8.43878730e-01, 3.70989423e-02, 1.19022328e-01],
[ 2.51328518e-02, 9.09905536e-01, 6.49616117e-02],
[ 8.98816908e-01, 9.33910096e-02, 7.79208227e-03],
[ 1.41376589e-01, 1.88822492e-02, 8.39741162e-01],
[ 4.97743086e-06, 9.42248660e-01, 5.77463628e-02],
[ 1.56530473e-02, 9.80386023e-01, 3.96092994e-03],
[ 5.08725850e-02, 8.43604796e-01, 1.05522619e-01],
[ 2.30447406e-02, 9.76817732e-01, 1.37527223e-04],
[ 2.61205153e-01, 7.19243432e-01, 1.95514147e-02],
[ 1.93474892e-03, 8.43538791e-01, 1.54526460e-01],
[ 9.95086044e-01, 5.04045287e-08, 4.91390547e-03],
[ 7.20595493e-04, 9.97778427e-01, 1.50097771e-03],
[ 8.42073425e-01, 2.01386086e-02, 1.37787966e-01],
[ 9.97577668e-01, 2.06724434e-03, 3.55087319e-04],
[ 4.95181598e-01, 4.42076834e-01, 6.27415684e-02]])
sample_size = y.shape[0]
#ROUGH SUM OF THE ORIGINAL DATA
print y.sum(axis=0)
with pm.Model() as multinom_test:
#DISTRIBUTION OF PARAMS I WANT TO LEARN
params = pm.Dirichlet('a', a=np.ones(k))
#DATA WITH A WEIGHTING APPLIED TO EACH DATAPOINT
data_weighting = abs(np.ones(sample_size)+np.random.normal(0, 1, sample_size))
#PROBLEM IS HERE: TRYING TO COMBINE THESE PROBABILITIES BY MULTIPLYING THE VECTORS AND SUMMING THE THE JOINT PROB
obs = DensityDist('observations',lambda value: -1*np.log(np.outer(params, data_weighting).sum()), observed=y)
trace = pm.sample(5000, pm.Metropolis())
print trace[probs].mean(0)
结果始终将所有概率放在最后一个词中而不是正确的参数
pymc3 version: 3.0.rc2
y.sum(axis=0):
[ 8.01826573 10.05304779 1.92868648]
trace[probs].mean(0)
[ 0.03199909 0.04873978 0.91926114]