TensorFlow中有一个小型的教育示例,该示例对MNIST数据集中的手写数字0、1和2进行了分类。这很简单,但是我无法弄清楚为什么它只更新偏差而权重保持不变。
# sandbox.py
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
from quiz import get_weights, get_biases, linear
def mnist_features_labels(n_labels):
"""
Gets the first <n> labels from the MNIST dataset
:param n_labels: Number of labels to use
:return: Tuple of feature list and label list
"""
mnist_features = []
mnist_labels = []
mnist = input_data.read_data_sets('/datasets/ud730/mnist', one_hot=True)
# In order to make quizzes run faster, we're only looking at 10000 images
for mnist_feature, mnist_label in zip(*mnist.train.next_batch(10000)):
# Add features and labels if it's for the first <n>th labels
if mnist_label[:n_labels].any():
mnist_features.append(mnist_feature)
mnist_labels.append(mnist_label[:n_labels])
return mnist_features, mnist_labels
# Number of features (28*28 image is 784 features)
n_features = 784
# Number of labels
n_labels = 3
# Features and Labels
features = tf.placeholder(tf.float32)
labels = tf.placeholder(tf.float32)
# Weights and Biases
w = get_weights(n_features, n_labels)
b = get_biases(n_labels)
# Linear Function xW + b
logits = linear(features, w, b)
# Training data
train_features, train_labels = mnist_features_labels(n_labels)
with tf.Session() as session:
session.run(tf.global_variables_initializer())
print("w_init:", session.run(w)) # show the initial values of the weights
print("b_init:", session.run(b)) # and the biases
prediction = tf.nn.softmax(logits)
cross_entropy = -tf.reduce_sum(labels * tf.log(prediction), reduction_indices=1)
#cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels)
loss = tf.reduce_mean(cross_entropy)
learning_rate = 0.08
# This is the method used to train the model
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
# Run optimizer and get loss
_, l = session.run(
[optimizer, loss],
feed_dict={features: train_features, labels: train_labels})
print("w_new:", session.run(w)) # show the updated values of the weights
print("b_new:", session.run(b)) # and the biases
print('Loss: {}'.format(l))
以下代码显示了如何创建权重和偏差变量。它还定义了linear()
函数:
# quiz.py
import tensorflow as tf
def get_weights(n_features, n_labels):
weights = tf.Variable(tf.truncated_normal(shape=(n_features, n_labels), dtype=tf.float32))
#weights = tf.get_variable(name='weights', shape=(n_features, n_labels), dtype=tf.float32,
# initializer=tf.glorot_normal_initializer())
#weights = tf.Variable(tf.truncated_normal(shape=(n_features, n_labels), dtype=tf.float32), trainable=False)
#weights = tf.truncated_normal(shape=(n_features, n_labels), dtype=tf.float32)
return weights
def get_biases(n_labels):
biases = tf.Variable(tf.zeros(n_labels))
return biases
def linear(input, w, b):
# Linear Function (xW + b)
m = tf.add(tf.matmul(input, w), b)
return m
该代码显示以下内容:
w_init: [[-0.9414071 -0.14461778 1.4990447 ]
[ 1.0635881 -1.3494984 -0.383863 ]
[-1.2310774 0.28843132 -0.15049268]
...
[ 0.6403666 1.1087595 -1.6045712 ]
[-1.1065984 0.9918786 1.2289807 ]
[ 0.9406288 -0.87747234 0.3908258 ]]
b_init: [0. 0. 0.]
w_new: [[-0.9414071 -0.14461778 1.4990447 ]
[ 1.0635881 -1.3494984 -0.383863 ]
[-1.2310774 0.28843132 -0.15049268]
...
[ 0.6403666 1.1087595 -1.6045712 ]
[-1.1065984 0.9918786 1.2289807 ]
[ 0.9406288 -0.87747234 0.3908258 ]]
b_new: [ 0.02507469 -0.01238031 -0.01269451]
Loss: 9.170862197875977
因此,它看起来像执行了梯度步骤,但它仅更改了偏差,而不更改了权重。我如何初始化权重和使用的交叉熵函数似乎都无关紧要(我尝试过softmax_cross_entropy_with_logits
)。谁能解释这种行为?