我正在尝试在Tensorflow中训练前后打印重量。我对我得到的东西感到困惑,因为即使训练显示成本降低,体重也似乎没有变化。我的代码是:
from __future__ import print_function
import tensorflow as tf
import numpy as np
import argparse
# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
# Parameters
learning_rate = 0.01
batch_size = 100
display_step = 1
load = False
max_epochs = 5
training_epochs = max_epochs
# Instantiate saver
if not load:
# tf Graph Input
x = tf.placeholder(tf.float32, [None, 784], name='x') # mnist data image of shape 28*28=784
y = tf.placeholder(tf.float32, [None, 10], name='y') # 0-9 digits recognition => 10 classes
# Set model weights
W = tf.get_variable('W', initializer=tf.random_normal((784,10), seed=0))
b = tf.get_variable('b',[10],initializer=tf.zeros_initializer)
# Construct model
#pred = tf.nn.softmax(tf.matmul(x, W) + b,name='pred') # Softmax
pred = tf.nn.softmax(tf.matmul(x, W),name='pred') # Softmax
# Minimize error using cross entropy
cost = tf.reduce_mean(-tf.reduce_sum(y*tf.log(pred), reduction_indices=1))
# Gradient Descent
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
init = tf.global_variables_initializer()
saver = tf.train.Saver()
# In order to be able to easily retrieve variables and ops later,
# we add them to collections
tf.add_to_collection('train_op', optimizer)
tf.add_to_collection('cost_op', cost)
tf.add_to_collection('input', x)
tf.add_to_collection('target', y)
tf.add_to_collection('pred', pred)
initial_epoch = 0
else:
# Find last executed epoch
from glob import glob
history = list(map(lambda x: int(x.split('-')[1][:-5]), glob('model.ckpt-*.meta')))
last_epoch = np.max(history)
# Instantiate saver object using previously saved meta-graph
saver = tf.train.import_meta_graph('model.ckpt-{}.meta'.format(last_epoch))
initial_epoch = last_epoch + 1
# Launch the graph
with tf.Session() as sess:
if not load:
sess.run(init)
else:
saver.restore(sess, 'model.ckpt-{}'.format(last_epoch))
optimizer = tf.get_collection('train_op')[0]
cost = tf.get_collection('cost_op')[0]
x = tf.get_collection('input')[0]
y = tf.get_collection('target')[0]
pred = tf.get_collection('pred')[0]
print ("Variables before training")
for var in tf.global_variables():
print (var.name, sess.run(var))
# Training cycle
for epoch in range(initial_epoch, training_epochs):
avg_cost = 0.
total_batch = int(mnist.train.num_examples/batch_size)
# Loop over all batches
for i in range(total_batch):
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
# Run optimization op (backprop) and cost op (to get loss value)
_, c = sess.run([optimizer, cost], feed_dict={x: batch_xs,
y: batch_ys})
# Compute average loss
avg_cost += c / total_batch
# Display logs per epoch step
if (epoch+1) % display_step == 0:
print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(avg_cost))
saver.save(sess, './model.ckpt', global_step=epoch)
print("Optimization Finished!")
# Test model
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print("Accuracy:", accuracy.eval({x: mnist.test.images, y: mnist.test.labels}))
print ("Variables after training")
for var in tf.global_variables():
print (var.name, sess.run(var))
在训练之前,打印变量显示类似于W:
的变量W:0 [[-0.22279324 0.75145274 0.30694658 ... -0.20406865 -0.10345581
0.47926915]
....
训练后,即使成本从2.2降至0.7,打印也会产生相同的结果。我的错误在哪里?
答案 0 :(得分:2)
你的体重在训练中发生了变化。我不确定,但我认为你没有看到它,因为你刚刚打印出部分重量并发现那些部件是相同的。我稍微更改了代码以添加numpy.array_equal
比较,并在训练循环中添加检查,如下所示:
from __future__ import print_function
import tensorflow as tf
import numpy as np
import argparse
# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
# Parameters
learning_rate = 0.01
batch_size = 100
display_step = 1
load = False
max_epochs = 5
training_epochs = max_epochs
# Instantiate saver
if not load:
# tf Graph Input
x = tf.placeholder(tf.float32, [None, 784], name='x') # mnist data image of shape 28*28=784
y = tf.placeholder(tf.float32, [None, 10], name='y') # 0-9 digits recognition => 10 classes
# Set model weights
W = tf.get_variable('W', initializer=tf.random_normal((784,10), seed=0))
b = tf.get_variable('b',[10],initializer=tf.zeros_initializer)
# Construct model
#pred = tf.nn.softmax(tf.matmul(x, W) + b,name='pred') # Softmax
pred = tf.nn.softmax(tf.matmul(x, W),name='pred') # Softmax
# Minimize error using cross entropy
cost = tf.reduce_mean(-tf.reduce_sum(y*tf.log(pred), reduction_indices=1))
# Gradient Descent
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
init = tf.global_variables_initializer()
saver = tf.train.Saver()
# In order to be able to easily retrieve variables and ops later,
# we add them to collections
tf.add_to_collection('train_op', optimizer)
tf.add_to_collection('cost_op', cost)
tf.add_to_collection('input', x)
tf.add_to_collection('target', y)
tf.add_to_collection('pred', pred)
initial_epoch = 0
else:
# Find last executed epoch
from glob import glob
history = list(map(lambda x: int(x.split('-')[1][:-5]), glob('model.ckpt-*.meta')))
last_epoch = np.max(history)
# Instantiate saver object using previously saved meta-graph
saver = tf.train.import_meta_graph('model.ckpt-{}.meta'.format(last_epoch))
initial_epoch = last_epoch + 1
# Launch the graph
with tf.Session() as sess:
if not load:
sess.run(init)
else:
saver.restore(sess, 'model.ckpt-{}'.format(last_epoch))
optimizer = tf.get_collection('train_op')[0]
cost = tf.get_collection('cost_op')[0]
x = tf.get_collection('input')[0]
y = tf.get_collection('target')[0]
pred = tf.get_collection('pred')[0]
print ("Variables before training")
old_var = {}
for var in tf.global_variables():
old_var[var.name] = sess.run(var)
#print (var.name, sess.run(var))
print(old_var)
new_var = {}
# Training cycle
for epoch in range(initial_epoch, training_epochs):
avg_cost = 0.
total_batch = int(mnist.train.num_examples/batch_size)
# Loop over all batches
for i in range(total_batch):
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
# Run optimization op (backprop) and cost op (to get loss value)
_, c = sess.run([optimizer, cost], feed_dict={x: batch_xs,
y: batch_ys})
# Compute average loss
avg_cost += c / total_batch
# Display logs per epoch step
if (epoch+1) % display_step == 0:
print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(avg_cost))
print('Check variable changes')
for var in tf.global_variables():
new_var[var.name] = sess.run(var)
for vname in new_var:
eq = np.array_equal(old_var[vname], new_var[vname])
print('Is {} changed? {}'.format(vname, not eq))
if not eq:
old_var[vname] = new_var[vname]
saver.save(sess, './model.ckpt', global_step=epoch)
print("Optimization Finished!")
# Test model
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print("Accuracy:", accuracy.eval({x: mnist.test.images, y: mnist.test.labels}))
print ("Variables after training")
for var in tf.global_variables():
new_var[var.name] = sess.run(var)
#print (var.name, sess.run(var))
print(new_var)
print('Check variable changes')
for vname in new_var:
eq = np.array_equal(old_var[vname], new_var[vname])
print('Is {} changed? {}'.format(vname, not eq))
我输出中最相关的部分是:
Epoch: 0001 cost= 7.935980950
Check variable changes
Is W:0 changed? True
Is b:0 changed? False
Epoch: 0002 cost= 4.306569523
Check variable changes
Is W:0 changed? True
Is b:0 changed? False
Epoch: 0003 cost= 3.009391170
Check variable changes
Is W:0 changed? True
Is b:0 changed? False
Epoch: 0004 cost= 2.379378949
Check variable changes
Is W:0 changed? True
Is b:0 changed? False
Epoch: 0005 cost= 2.014794181
Check variable changes
Is W:0 changed? True
Is b:0 changed? False
Optimization Finished!
您的偏见未按预期变化。如果你运行它,你也会发现你的训练后体重与你上一个时期的体重相同。
答案 1 :(得分:2)
ÿ。娄是正确的,你的W值实际 正在改变。部分原因不是很明显,因为打印W时看到的值是角和边(变化最小的部分)。我添加了一些绘图,显示了W和初始W之间的绝对差异,垂直上的时期和水平上的数字;更强烈的颜色对应更大的差异。
请注意,左上角和右下角几乎没有变化(通常字面上零变化);这些是您在打印时看到的值(简略摘要)W,它解释了为什么它似乎永远不会改变。这一切都很有意义,因为在预测数字类时,角点像素几乎不会有用。
from __future__ import print_function
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
# Parameters
learning_rate = 0.01
batch_size = 100
display_step = 1
load = False
max_epochs = 5
training_epochs = max_epochs
# Instantiate saver
if not load:
# tf Graph Input
x = tf.placeholder(tf.float32, [None, 784], name='x') # mnist data image of shape 28*28=784
y = tf.placeholder(tf.float32, [None, 10], name='y') # 0-9 digits recognition => 10 classes
# Set model weights
W = tf.get_variable('W', initializer=tf.random_normal((784,10), seed=0))
# Construct model
pred = tf.nn.softmax(tf.matmul(x, W),name='pred') # Softmax
# Minimize error using cross entropy
cost = tf.reduce_mean(-tf.reduce_sum(y*tf.log(pred), reduction_indices=1))
# Gradient Descent
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
train_oper=optimizer.minimize(cost)
print(optimizer.compute_gradients(cost))
init = tf.global_variables_initializer()
saver = tf.train.Saver()
# In order to be able to easily retrieve variables and ops later,
# we add them to collections
tf.add_to_collection('train_op', train_oper)
tf.add_to_collection('cost_op', cost)
tf.add_to_collection('input', x)
tf.add_to_collection('target', y)
tf.add_to_collection('pred', pred)
initial_epoch = 0
# Launch the graph
with tf.Session() as sess:
sess.run(init)
print ("Variables before training")
for var in tf.get_default_graph().get_collection('trainable_variables'):
print (var, sess.run(var))
#Initial value of W
W_0=W.eval(sess)
# Training cycle
diff=[]
for epoch in range(initial_epoch, training_epochs):
avg_cost = 0.
total_batch = int(mnist.train.num_examples/batch_size)
# Loop over all batches
for i in range(total_batch):
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
# Run optimization op (backprop) and cost op (to get loss value)
_, c = sess.run([train_oper, cost], feed_dict={x: batch_xs,
y: batch_ys})
# Compute average loss
avg_cost += c/total_batch
#Calculate difference between current W and initial W
diff.append(np.reshape(np.abs(W_0-sess.run(W)),(28,28,10)))
# Display logs per epoch step
if (epoch+1) % display_step == 0:
print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(avg_cost))
saver.save(sess, './model.ckpt', global_step=epoch)
print("Optimization Finished!")
# Test model
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print("Accuracy:", accuracy.eval({x: mnist.test.images, y: mnist.test.labels}))
print ("Variables after training")
for var in tf.get_default_graph().get_collection('trainable_variables'):
print (var, sess.run(var))
scale=np.max(diff)
fig,axs=plt.subplots(len(diff),10)
for i in range(len(diff)):
for j in range(10):
axs[i][j].imshow(diff[i][:,:,j]/scale,vmin=0,vmax=1)
axs[i][j].axis('off')
plt.show(fig)