Question

我正在尝试将pymc3与从某些观察到的数据派生的似然函数一起使用。这些观察到的数据不适合任何标准的标准分布，因此我想根据这些观察来定义自己的数据。

一种方法是对观察值使用核密度估计。这在pymc2中是可能的，但不能与pymc3中的Theano变量很好地配合。

在下面的代码中，我只是生成一些正态分布的伪数据。和我以前一样，我基本上是为观察结果分配一个均匀的分布。

这是我的代码：

from scipy import stats
import numpy as np
import pymc3 as pm
from sklearn.neighbors.kde import KernelDensity

data = np.sort(stats.norm.rvs(loc=0, scale=1, size=1000))
kde = KernelDensity(kernel='gaussian', bandwidth=0.1).fit(data.reshape(-1, 1))

def get_log_likelihood(x):
    return kde.score_samples(x)

with pm.Model() as test_model:
    x = pm.Uniform('prior rv', lower=-10, upper=10)
    obs = pm.DensityDist('observed likelihood', get_log_likelihood, observed={'x': x})

    step = pm.Metropolis()
    trace = pm.sample(200, step=step)

我收到的错误似乎是kde score_samples函数爆炸，因为它期望一个数组，但是x是Theano变量。

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-49-4efbbe7376dc> in <module>()
      1 with pm.Model() as test_model:
      2     x = pm.Uniform('prior rv', lower=0.0, upper=1e6)
----> 3     obs = pm.DensityDist('observed likelihood', get_log_likelihood, observed={'x': x})
      4 
      5     step = pm.Metropolis()

~/research_notebooks/venv/lib/python3.6/site-packages/pymc3/distributions/distribution.py in __new__(cls, name, *args, **kwargs)
     40             total_size = kwargs.pop('total_size', None)
     41             dist = cls.dist(*args, **kwargs)
---> 42             return model.Var(name, dist, data, total_size)
     43         else:
     44             raise TypeError("Name needs to be a string but got: {}".format(name))

~/research_notebooks/venv/lib/python3.6/site-packages/pymc3/model.py in Var(self, name, dist, data, total_size)
    825             with self:
    826                 var = MultiObservedRV(name=name, data=data, distribution=dist,
--> 827                                       total_size=total_size, model=self)
    828             self.observed_RVs.append(var)
    829             if var.missing_values:

~/research_notebooks/venv/lib/python3.6/site-packages/pymc3/model.py in __init__(self, name, data, distribution, total_size, model)
   1372         self.missing_values = [datum.missing_values for datum in self.data.values()
   1373                                if datum.missing_values is not None]
-> 1374         self.logp_elemwiset = distribution.logp(**self.data)
   1375         # The logp might need scaling in minibatches.
   1376         # This is done in `Factor`.

<ipython-input-48-535f58ce543b> in get_log_likelihood(x)
      1 def get_log_likelihood(x):
----> 2     return kde.score_samples(x)

~/research_notebooks/venv/lib/python3.6/site-packages/sklearn/neighbors/kde.py in score_samples(self, X)
    150         # For it to be a probability, we must scale it.  For this reason
    151         # we'll also scale atol.
--> 152         X = check_array(X, order='C', dtype=DTYPE)
    153         N = self.tree_.data.shape[0]
    154         atol_N = self.atol * N

~/research_notebooks/venv/lib/python3.6/site-packages/sklearn/utils/validation.py in check_array(array, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
    431                                       force_all_finite)
    432     else:
--> 433         array = np.array(array, dtype=dtype, order=order, copy=copy)
    434 
    435         if ensure_2d:

ValueError: setting an array element with a sequence.

任何帮助将不胜感激。谢谢！

pymc3具有来自内核密度估计的自定义似然函数

0 个答案: