在研究张量流时,我遇到了一个问题 成本函数输出' nan' nan。
并且,如果您在源代码中发现任何其他错误,请告诉我它的链接。
我正在尝试将费用函数值发送给我训练过的模型,但它不起作用。
tf.reset_default_graph()
tf.set_random_seed(777)
X = tf.placeholder(tf.float32, [None, 20, 20, 3])
Y = tf.placeholder(tf.float32, [None, 1])
with tf.variable_scope('conv1') as scope:
W1 = tf.Variable(tf.random_normal([4, 4, 3, 32], stddev=0.01), name='weight1')
L1 = tf.nn.conv2d(X, W1, strides=[1, 1, 1, 1], padding='SAME')
L1 = tf.nn.relu(L1)
L1 = tf.nn.max_pool(L1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
L1 = tf.reshape(L1, [-1, 10 * 10 * 32])
W1_hist = tf.summary.histogram('conv_weight1', W1)
L1_hist = tf.summary.histogram('conv_layer1', L1)
with tf.name_scope('fully_connected_layer1') as scope:
W2 = tf.get_variable('W2', shape=[10 * 10 * 32, 1], initializer=tf.contrib.layers.xavier_initializer())
b = tf.Variable(tf.random_normal([1]))
hypothesis = tf.matmul(L1, W2) + b
W2_hist = tf.summary.histogram('fully_connected_weight1', W2)
b_hist = tf.summary.histogram('fully_connected_bias', b)
hypothesis_hist = tf.summary.histogram('hypothesis', hypothesis)
with tf.name_scope('cost') as scope:
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) * tf.log(1 - hypothesis))
cost_summary = tf.summary.scalar('cost', cost)
with tf.name_scope('train_optimizer') as scope:
optimizer = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(cost)
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))
accuracy_summary = tf.summary.scalar('accuracy', accuracy)
train_data_batch, train_labels_batch = tf.train.batch([train_data, train_labels], enqueue_many=True , batch_size=100, allow_smaller_final_batch=True)
with tf.Session() as sess:
# tensorboard --logdir=./logs/planesnet2_log
merged_summary = tf.summary.merge_all()
writer = tf.summary.FileWriter('./logs/planesnet2_log')
writer.add_graph(sess.graph)
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
total_cost = 0
for step in range(20):
x_batch, y_batch = sess.run([train_data_batch, train_labels_batch])
feed_dict = {X: x_batch, Y: y_batch}
_, cost_val = sess.run([optimizer, cost], feed_dict = feed_dict)
total_cost += cost_val
print('total_cost: ', total_cost, 'cost_val: ', cost_val)
coord.request_stop()
coord.join(threads)
答案 0 :(得分:3)
你使用没有sigmoid激活函数的交叉熵损失hypothesis
,因此你的值不受限于] 0,1]。没有为负值定义日志功能,它很可能会得到一些。添加sigmoid和epsilon因子以避免负值或0值,你应该没问题。
答案 1 :(得分:1)
据我所知,
交叉熵成本函数假设您要预测的假设是随机值。因为交叉熵使用log函数和(1-Y_)
公式。因此,交叉熵损失只应用于随机情况。
因此,您必须使用 softmax 函数来生成hypothesis
概率的结果。
W2 = tf.get_variable('W2', shape=[10 * 10 * 32, 1],
initializer=tf.contrib.layers.xavier_initializer())
b = tf.Variable(tf.random_normal([1]))
# hypothesis = tf.matmul(L1, W2) + b
hypothesis = tf.nn.softmax(tf.add(tf.matmul(L1, W2), b))
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) * tf.log(1 - hypothesis))
或者您可以使用此代码
W2 = tf.get_variable('W2', shape=[10 * 10 * 32, 1],
initializer=tf.contrib.layers.xavier_initializer())
b = tf.Variable(tf.random_normal([1]))
hypothesis = tf.matmul(L1, W2) + b
cost = tf.nn.softmax_cross_entropy_with_logits(labels=Y, logits=hypothesis)