我有一个隐式的马尔可夫模型,具有初始分布,过渡分布和观察分布,均遵循tfd.Categorical
分布。
我必须裁剪probs
中的tfd.Categorical
以保持在0.01到0.99的范围内。否则,当我训练模型时,模型的对数似然性很快就会变小。
但是,我经常使参数停留在0.01和0.99附近。出了什么问题?
结果:
...
Step 436 with log likelihood: -32018.013672
...
The transition matrix is:
array([[0.01 , 0.17579712, 0.56883264, 0.9062757 ],
[0.42956257, 0.01 , 0.99 , 0.01 ],
[0.7651122 , 0.4100139 , 0.01 , 0.8126741 ],
[0.98799247, 0.7105173 , 0.09812165, 0.01 ]], dtype=float32),
...
Step 623 with log likelihood: -32013.833984
...
The transition matrix is:
array([[0.01 , 0.1704401 , 0.63210493, 0.95729625],
[0.4307539 , 0.01 , 0.99 , 0.01 ],
[0.7923921 , 0.42066112, 0.01 , 0.82306975],
[0.9821146 , 0.7217726 , 0.07453395, 0.01 ]], dtype=float32),
Once a value becomes 0.99 or 0.01, the value doesn't change.
But the true transition matrix doesn't have 0.99 or 0.01:
array([[0.17449613, 0.1632829 , 0.32036646, 0.34185451],
[0.45132582, 0.00919458, 0.49266389, 0.04681571],
[0.21424332, 0.20170992, 0.21845484, 0.36559191],
[0.01966765, 0.58017137, 0.24100347, 0.15915751]]
测试:
import tensorflow as tf
import numpy as np
from tensorflow_probability import distributions as tfd
import pprint
import sys
# -------------- #
# Simulate model #
# -------------- #
def simulate_msm(init_prob, trans_mat, n_steps, dtype=np.int8):
n_states = init_prob.size
state_trj = np.empty(n_steps, dtype=dtype)
state_trj[0] = np.random.choice(n_states, p=init_prob)
for i in range(1, n_steps):
p_now = trans_mat[state_trj[i - 1]]
state_trj[i] = np.random.choice(n_states, p=p_now)
return state_trj
def simulate_emis(state_trj, emis_mat, dtype=None):
if dtype is None:
dtype = state_trj.dtype
n_steps = state_trj.shape[0]
n_emis = emis_mat.shape[1]
emis_trj = np.empty(n_steps, dtype=dtype)
for i in range(n_steps):
state_now = state_trj[i]
p_now = emis_mat[state_now]
emis_trj[i] = np.random.choice(n_emis, p=p_now)
return emis_trj
# ---------------------------------- #
# Defining random variables of model #
# ---------------------------------- #
def make_normalized_rv(shape, param_name, rv_name, new_axis=True):
rv_param = tf.get_variable(
param_name, initializer=tf.initializers.random_uniform(0, 1),
shape=shape, constraint=lambda t: tf.clip_by_value(t, 0.01, 0.99))
if new_axis:
b = rv_param / tf.reduce_sum(rv_param, axis=1)[:, np.newaxis]
else:
b = rv_param / tf.reduce_sum(rv_param)
rv_param.assign(b)
rv = tfd.Categorical(probs=rv_param, name=rv_name)
return rv, rv_param
def make_trans_rv(n_hidden):
return make_normalized_rv((n_hidden, n_hidden), "trans_rv_param",
"trans_rv")
def make_emis_rv(n_hidden, n_emis):
return make_normalized_rv((n_hidden, n_emis), "emis_rv_param",
"emis_rv")
def make_init_rv(n_hidden):
return make_normalized_rv((n_hidden,), "init_rv_param", "init_rv",
new_axis=False)
def main():
n_hidden = 4
n_emis = 5
n_steps = 20000
trj_dtype = np.int8
# Create true parameters
true_init_prob = np.random.uniform(0, 1, size=n_hidden)
true_init_prob /= np.sum(true_init_prob)
true_trans_mat = np.random.uniform(0, 1, size=(n_hidden, n_hidden))
true_trans_mat /= np.sum(true_trans_mat, axis=1)[:, np.newaxis]
true_emis_mat = np.random.uniform(0, 1, size=(n_hidden, n_emis))
true_emis_mat /= np.sum(true_emis_mat, axis=1)[:, np.newaxis]
# Simulate trajectories
print("simulating trajectories")
true_state_trj = simulate_msm(true_init_prob, true_trans_mat, n_steps,
dtype=trj_dtype)
emis_trj = simulate_emis(true_state_trj, true_emis_mat)
# Create random variables
trans_rv, trans_param = make_trans_rv(n_hidden)
emis_rv, emis_param = make_emis_rv(n_hidden, n_emis)
init_rv, init_param = make_init_rv(n_hidden)
# Create hidden Markov model
hmm = tfd.HiddenMarkovModel(
initial_distribution=init_rv,
transition_distribution=trans_rv,
observation_distribution=emis_rv,
num_steps=n_steps, validate_args=True)
# Train
total_log_prob = hmm.log_prob(emis_trj)
train_op = tf.train.AdamOptimizer(0.1).minimize(-total_log_prob)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
i = 0
while True:
_, loss_, init_param_, trans_param_, emis_param_ = sess.run(
[train_op, total_log_prob, init_param, trans_param,
emis_param])
save_dict = {
'true_trans_mat': true_trans_mat,
'true_emis_mat': true_emis_mat,
'true_init_prob': true_init_prob,
'init_param': init_param_,
'trans_param': trans_param_,
'emis_param': emis_param_
}
print("%s: %f" % (i, loss_))
p_str = pprint.pformat(save_dict, indent=4)
print(p_str)
sys.stdout.flush()
i += 1
main()