给出以下示例代码(使用):
# Generate random data
x_train = np.random.rand(64, 16, 16, 8)
y_train = np.random.randint(0, 5, 64)
one_hot = np.zeros((len(y_train), 5))
one_hot[list(np.indices((len(y_train),))) + [y_train]] = 1
y_train = one_hot
# Model definition
class FeedForward(object):
def __init__(self):
self.x = tf.placeholder(tf.float32, shape=[None, 16, 16, 8], name="input_x")
self.y = tf.placeholder(tf.float32, [None, 5], name="input_y")
with tf.name_scope("conv1"):
kernel_shape=[3, 3, 8, 8]
w = tf.Variable(tf.truncated_normal(kernel_shape, stddev=0.1), name="weight")
conv1 = tf.nn.conv2d(self.x, w, strides=[1, 1, 1, 1], padding="SAME", name="conv")
with tf.name_scope("conv2"):
kernel_shape=[3, 3, 8, 4]
w = tf.Variable(tf.truncated_normal(kernel_shape, stddev=0.1), name="weight")
conv2 = tf.nn.conv2d(conv1, w, strides=[1, 1, 1, 1], padding="SAME", name="conv")
out = tf.contrib.layers.flatten(conv2)
with tf.name_scope("output"):
kernel_shape=[out.get_shape()[1].value, 5]
w = tf.Variable(tf.truncated_normal(kernel_shape, stddev=0.1), name="weight")
self.scores = tf.matmul(out, w)
predictions = tf.argmax(self.scores, axis=1, name="predictions")
self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.scores, labels=self.y))
correct_predictions = tf.equal(predictions, tf.argmax(self.y, axis=1))
self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
我希望执行自定义权重更新步骤,即除了每次迭代的权重更新外,我想从我的权重参数中减去一些固定值,如下所示:
with tf.Graph().as_default():
session_conf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)
sess = tf.Session(config=session_conf)
with sess.as_default():
ffn = FeedForward()
global_step = tf.Variable(0, name="global_step", trainable=False)
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-2)
grads_and_vars = optimizer.compute_gradients(ffn.loss)
updated_gv = []
for g, w in grads_and_vars:
# perform update on weights aside from ouput weights
if ("weight" in w.name) and ("output" not in w.name):
# some weight update
w_update = tf.Variable.assign(w, w - tf.constant(1.0, shape=w.get_shape()))
updated_gv.append([g, w])
# next two lines are not required here
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
train_op = optimizer.apply_gradients(updated_gv, global_step=global_step)
sess.run(tf.global_variables_initializer())
def train_step(x_batch, y_batch):
feed_dict = {
}
_, step, _update, loss, accuracy = sess.run([train_op, global_step, w_update, ffn.loss, ffn.accuracy],
feed_dict={ffn.x: x_batch, ffn.y: y_batch})
print("step {}, loss {:g}, acc {:g}".format(step, loss, accuracy))
batch_size = 32
s_idx = - batch_size
for batch_index in range(2):
s_idx += batch_size
e_idx = s_idx + batch_size
x_batch = x_train[s_idx:e_idx]
y_batch = y_train[s_idx:e_idx]
train_step(x_batch, y_batch)
current_step = tf.train.global_step(sess, global_step)
但是,上述代码(及其他类似的变体)不会影响实际权重。我假设使用tf.Variable.assign...
或tf.assign...
的任何作业都会被制作为原始变量的某些副本。
执行预期更新的最有意义的方法是什么?
答案 0 :(得分:0)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
此行只会在代码中返回一个空列表。
如果您想在计算和应用渐变后执行(手动)体重变化,可以将其分为两个步骤:
sess.run([your normal gradient step])
然后
sess.run([your weight update ops])