首先,我可以确认火车批次是否正确
批处理大小为8,并且输出和张量形状在上方,已进行预处理,可以在opencv
中看到。
问题是我的训练损失不能下降 参见下面的图片和代码:
这是tensorboard
这是代码:
#import package
import tensorflow as tf
import os
#using for provide train batch, batch_size=32
import cifar10_input_pipeline
# images shape can be [batch_size, height, width, 3]
def inference(images):
with tf.variable_scope('conv1'):
kernel = tf.get_variable('weights',
[3,3,3,32],
tf.float32,
initializer=tf.random_normal_initializer(stddev=1e-3))
bias = tf.get_variable('bias',
[32],
tf.float32,
initializer=tf.zeros_initializer())
conv1 = tf.nn.conv2d(images, kernel, [1,1,1,1], padding='SAME', name='conv')
conv1 = tf.nn.relu(tf.nn.bias_add(conv1, bias), name='relu')
pool1 = tf.nn.max_pool(conv1, [1,2,2,1], [1,2,2,1], padding='VALID', name='pool1')
with tf.variable_scope('conv2'):
kernel = tf.get_variable('weights',
[3,3,32,64],
tf.float32,
initializer=tf.random_normal_initializer(stddev=1e-3))
bias = tf.get_variable('bias',
[64],
tf.float32,
initializer=tf.zeros_initializer())
conv2 = tf.nn.conv2d(pool1, kernel, [1,1,1,1], padding='SAME', name='conv')
conv2 = tf.nn.relu(tf.nn.bias_add(conv2, bias), name='relu')
pool2 = tf.nn.max_pool(conv2, [1,2,2,1], [1,2,2,1], padding='VALID', name='pool2')
batch_size = images.get_shape()[0].value
flatten = tf.reshape(pool2, shape=[batch_size, -1])
dim = flatten.get_shape()[1].value
with tf.variable_scope('fc1'):
weights = tf.get_variable('weights',
[dim, 384],
tf.float32,
initializer=tf.random_normal_initializer(stddev=1e-3))
bias = tf.get_variable('bias',
[384],
tf.float32,
initializer=tf.zeros_initializer())
fc1 = tf.matmul(flatten, weights, name='fc')
fc1 = tf.nn.sigmoid(tf.nn.bias_add(fc1, bias), name='sigmoid')
with tf.variable_scope('softmax_linear'):
weights = tf.get_variable('weights',
[384, 10],
tf.float32,
initializer=tf.random_normal_initializer(stddev=1e-3))
bias = tf.get_variable('bias',
[10],
tf.float32,
initializer=tf.zeros_initializer())
fc2 = tf.matmul(fc1, weights, name='fc')
# final connected layer, return without softmax function
logits = tf.nn.bias_add(fc2, bias, name='logits')
print('inference sucess')
return logits
#caculate loss
def loss(logits, labels):
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels,
name='cross_entropy')
loss = tf.reduce_mean(cross_entropy, name='loss')
print('loss sucess')
return loss
#return training op
def train(loss):
opt = tf.train.GradientDescentOptimizer(learning_rate=0.01)
train_op = opt.minimize(loss)
print('train sucess')
return train_op
#calculate accuracy, use train data batch
def accuracy(logits, labels):
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(logits, axis=1, output_type=tf.int32), labels),
tf.int32),
name='accuracy')
return accuracy
if __name__ == '__main__':
data_dir = '/home/mao/Notebooks/cifar10/cifar-10-batches-bin/'
filenames = [os.path.join(data_dir, 'data_batch_%d.bin' % i) for i in range(1, 6)]
example_batch = cifar10_input_pipeline.input_pipeline(filenames, batch_size=32, num_epochs=None)
images = example_batch[0]
labels = example_batch[1]
_logits = inference(images)
_loss = loss(_logits, labels)
_train_op = train(_loss)
sess = tf.Session()
#global_step = tf.train.get_or_create_global_step()
init_op = tf.group(tf.global_variables_initializer(),
tf.local_variables_initializer())
sess.run(init_op)
tf.summary.FileWriterCache.clear()
writer = tf.summary.FileWriter('./test_model', sess.graph)
Loss = tf.summary.scalar('Loss', _loss)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord, sess=sess)
try:
for i in range(1000):
if not coord.should_stop():
_, sum1 = sess.run([_train_op, Loss])
writer.add_summary(sum1, i)
if i % 100 == 0:
print(sess.run(labels))
except tf.errors.OutOfRangeError:
print('catch OutOfRangeError')
finally:
coord.request_stop()
coord.join(threads)
writer.flush()
writer.close()
sess.close()
更多详细信息:
以下是sess.run(train_op)时的丢失摘要和标签批处理,再次,我确定训练时标签和图像批处理被改组了。
那么,函数调用中是否有任何错误?即使删除输出层(登录)中的tf.nn.relu,损失仍然无法下拉。我很困惑
有人可以帮忙吗?
谢谢!