TensorFlow GradientDescentOptimizer仅更新偏差

时间:2019-10-26 13:07:57

标签: tensorflow machine-learning

TensorFlow中有一个小型的教育示例,该示例对MNIST数据集中的手写数字0、1和2进行了分类。这很简单,但是我无法弄清楚为什么它只更新偏差而权重保持不变。

# sandbox.py
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
from quiz import get_weights, get_biases, linear

def mnist_features_labels(n_labels):
    """
    Gets the first <n> labels from the MNIST dataset
    :param n_labels: Number of labels to use
    :return: Tuple of feature list and label list
    """
    mnist_features = []
    mnist_labels = []

    mnist = input_data.read_data_sets('/datasets/ud730/mnist', one_hot=True)

    # In order to make quizzes run faster, we're only looking at 10000 images
    for mnist_feature, mnist_label in zip(*mnist.train.next_batch(10000)):

        # Add features and labels if it's for the first <n>th labels
        if mnist_label[:n_labels].any():
            mnist_features.append(mnist_feature)
            mnist_labels.append(mnist_label[:n_labels])

    return mnist_features, mnist_labels



# Number of features (28*28 image is 784 features)
n_features = 784
# Number of labels
n_labels = 3

# Features and Labels
features = tf.placeholder(tf.float32)
labels = tf.placeholder(tf.float32)

# Weights and Biases
w = get_weights(n_features, n_labels)
b = get_biases(n_labels)

# Linear Function xW + b
logits = linear(features, w, b)

# Training data
train_features, train_labels = mnist_features_labels(n_labels)

with tf.Session() as session:
    session.run(tf.global_variables_initializer())

    print("w_init:", session.run(w)) # show the initial values of the weights
    print("b_init:", session.run(b)) # and the biases

    prediction = tf.nn.softmax(logits)
    cross_entropy = -tf.reduce_sum(labels * tf.log(prediction), reduction_indices=1)
    #cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels)

    loss = tf.reduce_mean(cross_entropy)

    learning_rate = 0.08

    # This is the method used to train the model
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)

    # Run optimizer and get loss
    _, l = session.run(
        [optimizer, loss],
        feed_dict={features: train_features, labels: train_labels})

    print("w_new:", session.run(w)) # show the updated values of the weights
    print("b_new:", session.run(b)) # and the biases

print('Loss: {}'.format(l))

以下代码显示了如何创建权重和偏差变量。它还定义了linear()函数:

# quiz.py
import tensorflow as tf

def get_weights(n_features, n_labels):
    weights = tf.Variable(tf.truncated_normal(shape=(n_features, n_labels), dtype=tf.float32))
    #weights = tf.get_variable(name='weights', shape=(n_features, n_labels), dtype=tf.float32, 
    #   initializer=tf.glorot_normal_initializer())
    #weights = tf.Variable(tf.truncated_normal(shape=(n_features, n_labels), dtype=tf.float32), trainable=False)
    #weights = tf.truncated_normal(shape=(n_features, n_labels), dtype=tf.float32)
    return weights


def get_biases(n_labels):
    biases = tf.Variable(tf.zeros(n_labels))
    return biases

def linear(input, w, b):
    # Linear Function (xW + b)
    m = tf.add(tf.matmul(input, w), b)
    return m

该代码显示以下内容:

w_init:  [[-0.9414071  -0.14461778  1.4990447 ]
 [ 1.0635881  -1.3494984  -0.383863  ]
 [-1.2310774   0.28843132 -0.15049268]
 ...
 [ 0.6403666   1.1087595  -1.6045712 ]
 [-1.1065984   0.9918786   1.2289807 ]
 [ 0.9406288  -0.87747234  0.3908258 ]]
b_init:  [0. 0. 0.]

w_new: [[-0.9414071  -0.14461778  1.4990447 ]
 [ 1.0635881  -1.3494984  -0.383863  ]
 [-1.2310774   0.28843132 -0.15049268]
 ...
 [ 0.6403666   1.1087595  -1.6045712 ]
 [-1.1065984   0.9918786   1.2289807 ]
 [ 0.9406288  -0.87747234  0.3908258 ]]
b_new: [ 0.02507469 -0.01238031 -0.01269451]
Loss: 9.170862197875977

因此,它看起来像执行了梯度步骤,但它仅更改了偏差,而不更改了权重。我如何初始化权重和使用的交叉熵函数似乎都无关紧要(我尝试过softmax_cross_entropy_with_logits)。谁能解释这种行为?

0 个答案:

没有答案