我是变数推理的新手,正在阅读《变分推理:统计学家评论》(https://arxiv.org/abs/1601.00670)一文。有一个简单的例子可以在混合的高斯模型上应用VI。我正在尝试使用张量流概率来实现这一点。而且不知道问题出在哪一部分。我对理论的理解或对TF概率的理解都可能与某件事有关。
从tf概率示例中获取示例参考,例如概率PCA。我已经编写了以下代码:
#imports
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import tensorflow as tf
import tensorflow_probability as tfp
from tensorflow_probability import edward2 as ed
#Generative model
def GMM(N, K, data_dim, stddv, pi):
mu = ed.Normal(loc=tf.zeros(data_dim), scale=stddv, sample_shape=K, name='mu') #cluster_means
c = ed.Categorical(probs=pi, sample_shape=N, name='c') #cluster_assignments
x = ed.Normal(loc=tf.gather(mu, indices=c), scale=1, name='x')
return x, (mu, c)
stddv = 4.
K = 3
N = 500.
data_dim = 2
pi = np.ones(K, dtype=np.float32)*(1./K)
model = GMM(N=N, K=K, data_dim=data_dim, stddv=stddv, pi=pi)
# Generating sample data using the model
with tf.Session() as sess:
x_train, (actual_means, actual_cluster_assignments) = sess.run(model)
print("Actual Means:")
print(actual_means)
# Visualize data
plt.scatter(x_train[:, 0], x_train[:, 1], color='blue', alpha=0.5)
plt.axis([-10, 10, -10, 10])
plt.title("Data set")
plt.show()
# VARIATIONAL INFERENCE
tf.reset_default_graph()
log_joint = ed.make_log_joint_fn(GMM)
def target(mu, c):
return log_joint(N=N, K=K, data_dim=data_dim, stddv=stddv, pi=pi,
mu=mu, c=c, x=x_train)
# variational distribution
def variational_model(q_mean, q_stddv, q_phi):
q_mu = ed.Normal(loc=q_mean, scale=q_stddv, name='q_mu')
q_c = ed.Categorical(probs=q_phi, name='q_c')
return q_mu, q_c
# variational parameters
q_mean = tf.Variable(tf.random_normal([K, data_dim], stddev=0.5), dtype=tf.float32)
q_stddv = tf.nn.softplus(4 * tf.Variable(tf.random_normal([K, data_dim], stddev=0.5), dtype=tf.float32))
q_phi = tf.nn.softmax(tf.Variable(tf.random_normal([int(N), K], stddev=0.5), dtype=tf.float32))
log_q = ed.make_log_joint_fn(variational_model)
def target_q(q_mu, q_c):
return log_q(q_mean=q_mean, q_stddv=q_stddv, q_phi=q_phi,
q_mu=q_mu, q_c=q_c)
q_mu, q_c = variational_model(q_mean=q_mean, q_stddv=q_stddv, q_phi=q_phi) # variational model
entropy = -target_q(q_mu, q_c) # -E[log q(z)]
energy = target(q_mu, q_c) # E[log p(x, z)]
elbo = energy + entropy
optimizer = tf.train.AdamOptimizer(learning_rate = 0.05)
train = optimizer.minimize(-elbo) # Maximize the elbo
init = tf.global_variables_initializer()
t = []
num_epochs = 1000
with tf.Session() as sess:
sess.run(init)
for i in range(num_epochs):
sess.run(train)
if i % 5 == 0:
t.append(sess.run([elbo]))
mean_inferred = sess.run(q_mean)
stddv_inferred = sess.run(q_stddv)
phi_inferred = sess.run(q_phi)
plt.plot(range(1, num_epochs, 5), t)
plt.show()
print("Inferred mean:")
print(mean_inferred)
print("Standard Deviation:")
print(stddv_inferred)
print("Phi:")
print(phi_inferred)
推断的值与实际值没有任何接近。当我尝试使用推断值生成数据时,它们仅在一个群集中,该群集处于K个原始群集的平均值。
更新:甚至尝试使用1D。