所以我的问题是,在优化器类的_apply_dense()
方法中进行梯度下降后,如何对变量进行归一化。这是我目前拥有的。
def _apply_dense(self, grad, var):
lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
var_update = state_ops.assign_sub(var, lr_t*grad)
normalize = var.assign(tf.norm(var))
return control_flow_ops.group(*[var_update,normalize])
我当前的代码似乎只是在不应用梯度下降更新的情况下对原始变量进行了标准化。我知道这是由于我在上面具有标准化步骤而已,只是重新分配了原始变量而已标准化。如何纠正这个问题,以便应用梯度下降步骤,然后对结果进行归一化。
答案 0 :(得分:1)
这可以通过以下方式实现:
lr = 0.01
with tf.name_scope('optimizer'):
vars_ = tf.trainable_variables()
grads = tf.gradients(loss_tensor, vars_)
assign_ops = [tf.assign(v, (v - lr*g)) for g, v in zip(grads, vars_)]
with tf.control_dependencies(assign_ops):
vars_norms = [tf.sqrt(2*tf.nn.l2_loss(v)) for v in vars_]
# next line prevents division by zero
vars_norms = [tf.clip_by_value(n, 0.00001, np.inf) for n in vars_norms]
update_ops = [tf.assign(v, v/n) for v, n in zip(vars_, vars_norms)]
update_op = tf.group(update_ops)
请注意,如果我添加了tf.clip_by_value()
以防止被零除。
这是一个完整的用法示例:
import tensorflow as tf
import numpy as np
x = tf.placeholder(tf.float32, shape=(None, 2))
y = tf.placeholder(tf.int32, shape=(None))
logits = tf.layers.dense(x, 2)
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
labels=y, logits=logits)
loss_tensor = tf.reduce_mean(xentropy)
lr = 0.01
with tf.name_scope('optimizer'):
vars_ = tf.trainable_variables()
grads = tf.gradients(loss_tensor, vars_)
assign_ops = [tf.assign(v, (v - lr*g)) for g, v in zip(grads, vars_)]
with tf.control_dependencies(assign_ops):
vars_norms = [tf.sqrt(2*tf.nn.l2_loss(v)) for v in vars_]
# next line prevents division by zero
vars_norms = [tf.clip_by_value(n, 0.00001, np.inf) for n in vars_norms]
update_ops = [tf.assign(v, v/n) for v, n in zip(vars_, vars_norms)]
update_op = tf.group(update_ops)
# dummy data for illustration
x_train = np.random.normal(size=(10, 2))
x_train = np.vstack([x_train, 2*np.random.normal(size=(10, 2))])
y_train = [0 for _ in range(10)] + [1 for _ in range(10)]
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(10):
loss, _ = sess.run([loss_tensor, update_op], feed_dict={x:x_train, y:y_train})
print(loss)
# 0.7111398
# 0.7172677
# 0.71517026
# 0.713101
# 0.71105987
# 0.7090467
# 0.70706147
# 0.7051038
# 0.7031738
# 0.7012712