我预计TensorFlow中tf.sign()的渐变等于0或None。然而,当我检查梯度时,我发现它们等于非常小的数字(例如1.86264515e-09)。那是为什么?
(如果你很好奇为什么我甚至想知道这一点,那是因为我想实现"直通估算器"描述here,然后覆盖渐变tf.sign(),我想检查默认行为实际上是我期待的。)
编辑:这是一些重现错误的代码。该模型只是从TensorFlow引入的线性回归模型,除了我使用y = sign(W)x + b而不是y = Wx + b。
import tensorflow as tf
import numpy as np
def gvdebug(g, v):
g2 = tf.zeros_like(g, dtype=tf.float32)
v2 = tf.zeros_like(v, dtype=tf.float32)
g2 = g
v2 = v
return g2,v2
# Create 100 phony x, y data points in NumPy, y = x * 0.1 + 0.3
x_data = np.random.rand(100).astype(np.float32)
y_data = x_data * 0.1 + 0.3
# Try to find values for W and b that compute y_data = W * x_data + b
# (We know that W should be 0.1 and b 0.3, but TensorFlow will
# figure that out for us.)
W = tf.Variable(tf.random_uniform([1], -1.0, 1.0))
b = tf.Variable(tf.zeros([1]))
y = tf.sign(W) * x_data + b
# Minimize the mean squared errors.
loss = tf.reduce_mean(tf.square(y - y_data))
optimizer = tf.train.GradientDescentOptimizer(0.5)
grads_and_vars = optimizer.compute_gradients(loss)
gv2 = [gvdebug(gv[0], gv[1]) for gv in grads_and_vars]
apply_grads = optimizer.apply_gradients(gv2)
# Before starting, initialize the variables. We will 'run' this first.
init = tf.initialize_all_variables()
# Launch the graph.
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.01)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
sess.run(init)
# Fit the line.
for step in range(201):
sess.run(apply_grads)
if (step % 20 == 0) or ((step-1) % 20 == 0):
print("")
print(sess.run(gv2[0][1])) #the variable
print(sess.run(gv2[0][0])) #the gradient
print("")
print(step, sess.run(W), sess.run(b))
# Learns best fit is W: [0.1], b: [0.3]