因此,对于高斯过程的参数,我有以下MCMC代码
代码:
walkers = 250
pos_min = np.concatenate((np.array([self.initial_scale, self.initial_noise]), np.zeros(self.ndim)))
pos_max = np.concatenate((np.array([self.initial_scale, self.initial_noise]), 2.0*np.ones(self.ndim)))
psize = pos_max - pos_min
pos = [pos_min + psize*np.random.rand(self.ndim+2) for i in range(nwalkers)]
with Pool(4) as pool:
sampler = emcee.EnsembleSampler(nwalkers, self.ndim+2, self.lnprob, pool=pool)
pos, prob, state = sampler.run_mcmc(pos, 200)
sampler.reset()
sampler.run_mcmc(pos, 300)
samples = sampler.flatchain[-500:,:]
return samples
函数是self.lnprob在以下代码段中定义
代码
def lnprior(theta):
l = theta[2:]
s2_f = theta[0]
s2_n = theta[1]
if 0 < s2_f and 0 < s2_n and (l > 0).all() and (l < 2.0).all():
return np.log(np.log(1 + (0.1/s2_n)**2)) -0.5*(np.log(np.sqrt(s2_f))/1.0)**2
return -np.inf
def lnlike(theta):
l = theta[2:]
sigma_f = theta[0]
sigman = theta[1]
self.kernel.k1.k1.constant_value = sigma_f
self.kernel.k1.k2.length_scale = l
self.kernel.k2.noise_level = sigman
K = self.kernel(self.Xi)
K[np.diag_indices_from(K)] += self.alpha
L = cholesky(K, lower=True) # Line 2
# Support multi-dimensional output of self.y_train_
y_train = self.Yi
if y_train.ndim == 1:
y_train = y_train[:, np.newaxis]
alpha = cho_solve((L, True), y_train) # Line 3
# Compute log-likelihood (compare line 7)
log_likelihood_dims = -0.5 * np.einsum("ik,ik->k", y_train, alpha)
log_likelihood_dims -= np.log(np.diag(L)).sum()
log_likelihood_dims -= K.shape[0] / 2 * np.log(2 * np.pi)
log_likelihood = log_likelihood_dims.sum(-1)
return log_likelihood
lp = lnprior(theta)
if not np.isfinite(lp):
return -np.inf
return lp + lnlike(theta)
代码在我的笔记本电脑(这是Intel Core i5第7代)上花费30秒钟。 theta数组的大小为4,Self.Xi的大小为2。
相同的代码在具有64核的Intel xeon phi处理器上花费超过400s。
此外,我注意到随着多处理池中池数量的增加,时间似乎也在增加。
在英特尔至强phi处理器上花费这么多时间是否有任何原因?我该怎么做才能在Intel Xeon phi处理器上加快速度?