在我的train()方法中,我从方法中存储logit返回值的变量第一次不起作用。具体部分如下所示:
with tf.variable_scope("logits_out") as scope:
logits_out = network(image_batch)
scope.reuse_variables()
v_logits_out = network(v_image_batch)
scope.reuse_variables()
reused_logits_out = network(image_batch)
对网络的第一次和第二次调用是针对原始输入图像和验证图像。 (我测试验证每100个训练步骤)。但是,logits_out的输出将是第一次调用的常量(在tensorboard上显示)。但是,第二个调用会产生不同的值(如tensorboard中所示),所以我尝试使用原始图像添加第三个调用,如上所示为reused_logits_out。并且它导致非恒定的猜测,但是损失函数永远不会改变。丢失函数使用第三次调用network(),但它没有显示任何变化,并且tensorboard日志同意。但是,使用该损失函数我使用infer_out获得的输出会发生变化!如果我在损失函数中使用第一次网络调用,则猜测是一个常数,不会改变。
def network(image_batch):
# 3x3 patch, 3 input channel (the image), 32 output channels
######################################
# First layer Convolution
######################################
W_conv1 = weight_variable([3, 3, 3, 32])
b_conv1 = bias_variable([32])
# Reshape our image to matrix multiply and number of colors channel
x_image = tf.reshape(image_batch, [-1, IMAGE_SIZE, IMAGE_SIZE, 3]) #64
tf.summary.image('input', x_image, 10)
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
#h_pool1 = max_pool_2x2(h_conv1)
######################################
# Second layer Convolution M6-1 network [POOLED]
######################################
# 3x3 patch, 32 input channel (the previous output channels), 32 output channels
W_conv2 = weight_variable([3, 3, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_conv1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2) # 32
######################################
# Third layer Convolution M6-1 network
######################################
# 3x3 patch, 32 input channel (the previous output channels), 64 output channels
W_conv3 = weight_variable([3, 3, 64, 128])
b_conv3 = bias_variable([128])
h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3) + b_conv3)
#h_pool3 = max_pool_2x2(h_conv3)
######################################
# Forth layer Convolution M6-1 network
######################################
# 3x3 patch, 64 input channel (the previous output channels), 64 output channels
W_conv4 = weight_variable([4, 4, 128, 256])
b_conv4 = bias_variable([256])
h_conv4 = tf.nn.relu(conv2d(h_conv3, W_conv4) + b_conv4)
h_pool4 = max_pool_2x2(h_conv4) # 16
W_conv5 = weight_variable([5, 5, 256, 256])
b_conv5 = bias_variable([256])
h_conv5 = tf.nn.relu(conv2d(h_pool4, W_conv5) + b_conv5)
h_pool5 = max_pool_2x2(h_conv5) # 8
#was 7*7*256 which makes sense
######################################
# Fifth layer fully connected M6-1 network
######################################
# Our image is now 16 x 16 after 2 max pools...and we have 256 channels so
# we want a matrix 16 * 16 * 64 and then 256 neurons
W_fc1 = weight_variable([8 * 8 * 256, 2048])
b_fc1 = bias_variable([2048])
h_pool5_flat = tf.reshape(h_pool5, [-1, 8 * 8 * 256])
h_fc1 = tf.nn.relu(tf.matmul(h_pool5_flat, W_fc1) + b_fc1)
#tf.summary.histogram('h_fc1', h_fc1)
h_fc1_drop = tf.nn.dropout(h_fc1, 1.0)
######################################
# 6th FC layer fully connected M6-1 network
######################################
# Our image is now 16 x 16 after 2 max pools...and we have 256 channels so
# we want a matrix 16 * 16 * 64 and then 256 neurons
W_fc2 = weight_variable([2048, 256])
b_fc2 = bias_variable([256])
#variable_summaries(W_fc2, name="2nd_FC_Weights")
#variable_summaries(b_fc2, name="2nd_FC_Bias")
h_fc2 = tf.nn.relu(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
#tf.summary.histogram('h_fc2', h_fc2)
h_fc2_drop = tf.nn.dropout(h_fc2, 1.0)
######################################
# 7th FC layer fully connected M6-1 network
######################################
W_fc3 = weight_variable([256, 256])
b_fc3 = bias_variable([256])
#variable_summaries(W_fc3, name="3rd_FC_Weights")
#variable_summaries(b_fc3, name="3rd_FC_Bias")
h_fc3 = tf.nn.relu(tf.matmul(h_fc2_drop, W_fc3) + b_fc3)
#tf.summary.histogram('h_fc3', h_fc3)
######################################
# Final FC layer fully connected M6-1 network
######################################
W_fc4 = weight_variable([256, 1])
b_fc4 = bias_variable([1])
#variable_summaries(W_fc4, name="4th_FC_Weights")
#variable_summaries(b_fc4, name="4th_FC_Bias")
#y_conv = tf.nn.softmax(tf.matmul(h_fc3, W_fc4) + b_fc4)
y_conv = tf.multiply(tf.atan(tf.matmul(h_fc3, W_fc4) + b_fc4), 2)
variable_summaries(y_conv, name="net_out")
return y_conv
def train():
image_batch_out, label_batch_out, filename_batch = input(if_eval = False)
v_image_batch_out, v_label_batch_out, v_filename_batch = v_input()
image_batch_placeholder = tf.placeholder(tf.float32, shape=[BATCH_SIZE, None, None, 3])
v_image_batch_placeholder = tf.placeholder(tf.float32, shape=[v_BATCH_SIZE, None, None, 3])
image_batch = tf.reshape(image_batch_out, (BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, 3))
v_image_batch = tf.reshape(v_image_batch_out, (v_BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, 3))
label_batch_placeholder = tf.placeholder(tf.float32, shape=[BATCH_SIZE, NUM_CLASSES])
label_tensor_placeholder = tf.placeholder(tf.int64, shape=[v_BATCH_SIZE])
label_offset = -tf.ones([BATCH_SIZE], dtype=tf.int64, name="label_batch_offset")
v_label_offset = -tf.ones([v_BATCH_SIZE], dtype=tf.int64, name="label_batch_offset")
label_batch_one_hot = tf.one_hot(tf.add(label_batch_out, label_offset), depth=NUM_CLASSES, on_value=1.0, off_value=0.0)
label_batch = tf.add(label_batch_out, label_offset)
v_label_batch = tf.add(v_label_batch_out, v_label_offset)
with tf.variable_scope("logits_out") as scope:
logits_out = network(image_batch)
scope.reuse_variables()
v_logits_out = network(v_image_batch)
scope.reuse_variables()
reused_logits_out = network(image_batch)
logits_batch = tf.to_int64(tf.arg_max(v_logits_out, dimension = 1))
#loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(labels=label_batch_one_hot, logits=logits_out))
#prediction_op = tf.nn.softmax(logits_out)
correct_prediction = tf.equal(logits_batch, label_tensor_placeholder)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
#transposed_logits = tf.transpose(logits_out)
#loss = tf.losses.mean_squared_error(labels=label_batch_placeholder, predictions=prediction_op)
loss = tf.reduce_mean(tf.square(reused_logits_out-label_batch_placeholder))
global_step = tf.Variable(0, name='global_step', trainable=False)
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
#variable_summaries(prediction_op, name="Predictions")
#surity_summary(prediction_op, name="Certainty")
tf.summary.scalar("loss", loss)
merged_summary_op = tf.summary.merge_all()
with tf.Session() as sess:
summary_writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph())
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()
if(tf.train.checkpoint_exists(chk_path)):
saver.restore(sess, chk_path)
else:
saver.save(sess, chk_path)
epoch_file = open(epoch_path, 'r')
epoch = int(epoch_file.read())
epoch_file.close()
accu_file = open(best_path, 'r')
best_accu = float(accu_file.read())
accu_file.close()
improved = 0;
not_improved = 0;
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord, sess = sess)
for j in range(1, 1000):
epoch_file= open(epoch_path, 'w')
epoch_file.write(str(epoch))
epoch_file.close()
for i in range(int(TRAINING_SET_SIZE/BATCH_SIZE)):
image_out, label_out, label_batch_one_hot_out, filename_out = sess.run([image_batch, label_batch_out, label_batch_one_hot, filename_batch])
_, infer_out, loss_out, summary, global_step_out = sess.run([train_step, logits_out, loss, merged_summary_op, global_step], feed_dict={image_batch_placeholder: image_out, label_batch_placeholder: label_batch_one_hot_out})
#print(image_out.shape)
#print("label_out: ")
#print(filename_out)
#print(label_out)
#print(label_batch_one_hot_out)
#print("infer_out: ")
#print(infer_out)
#print("prediction: ")
#print(predict_out)
print("loss: " + str(loss_out))
print("local step: " + str(i))
print("global step: " + str(global_step_out - 1))
print("epoch: " + str(epoch))
print("improved: " + str(improved))
print("not improved: " + str(not_improved))
label_value = label_out
estimate = infer_out
err = label_value - estimate
for k in range (0, 1):
print("label value: ", label_value[k], \
"estimated value: ", estimate[k])#, \
# "err: ", err[i])
print("[*]==============================")
if(i%100 == 0 and i != 0):
accuracy_accu = []
for p in range(int(v_TRAINING_SET_SIZE/v_BATCH_SIZE)):
v_image_out, v_label_out, v_filename_out = sess.run([v_image_batch, v_label_batch, v_filename_batch])
v_accuracy_out, v_logits_batch_out= sess.run([accuracy, logits_batch], feed_dict={v_image_batch_placeholder: v_image_out, label_tensor_placeholder: v_label_out})
accuracy_accu.append(v_accuracy_out)
print(p)
print("accuracy: ", v_accuracy_out)
print(v_label_out)
print("Accuracy: ")
print(np.mean(accuracy_accu))
print("Best Accuracy: ")
print(best_accu)
saver.save(sess, chk_path)
if((np.mean(accuracy_accu)) > best_accu):
print("improved")
saver.save(sess, best_chk_path)
print("saved")
best_accu = np.mean(accuracy_accu)
accu_file = open(best_path, 'w')
accu_file.write(str(best_accu))
accu_file.close()
print("New best accuracy!")
improved += 1
else:
print("not improved")
not_improved += 1
if(i%10 == 0):
summary_writer.add_summary(summary, global_step_out)
epoch += 1
coord.request_stop()
coord.join(threads)
sess.close()
train()
任何指向正确方向的提示都会很棒。我将从培训中附加日志的张量板图像,以显示我对网络的不同调用的含义()
对network()的不同调用的变量摘要
丢失功能,在这张图片中正在使用似乎正在猜测的网络的第3次呼叫
答案 0 :(得分:0)
我一直在问愚蠢的问题,但是对于今后遇到这个问题的人来说,这是答案。
我发现因为我从很多输出类到一个输出类,所以它真的加快了网络的收敛速度。这些图片实际上只是指示一个网络,只需几个训练步骤就可以收敛到所有相同的答案。要解决此问题,您可以将学习率设置为原来的1/10000(或类似),并获得可训练的网络。在我的例子中,分类器的结构在a.001速率下学习得很好,但是当给出一个输出神经元时,我需要一个.00000001的学习速率来收敛到更可接受的解决方案。祝你好运!