我正在尝试实现具有9个不同目标的逻辑回归问题。调试时我得到了
Epoch: 0025 cost= nan
这是一个批次的样子
batch_xs
[[ 3.40000000e+01 3.34000000e+01 9.00000000e-02 3.40000000e+01
4.06858908e+00 0.00000000e+00 3.30000000e+01 4.04000000e+01
1.00000000e-02 3.30000000e+01 4.06858908e+00 1.00000000e+00
3.20000000e+01 4.22000000e+01 7.00000000e-01 3.20000000e+01
4.06858908e+00 2.00000000e+00 3.10000000e+01 4.18000000e+01
5.00000000e-01 3.10000000e+01 4.06858908e+00 3.00000000e+00
3.00000000e+01 3.70000000e+01 0.00000000e+00 3.00000000e+01
4.06858908e+00 4.00000000e+00 2.90000000e+01 3.78000000e+01
2.00000000e-02 2.90000000e+01 4.06858908e+00 5.00000000e+00
2.80000000e+01 3.78000000e+01 2.00000000e-02 2.90000000e+01
4.06858908e+00 6.00000000e+00 4.90000000e+01 4.00000000e+00
1.00000000e+00]]
batch_ys:
[[0 0 0 1 0 0 0 0 0]]
虽然原始y是。我使用convert_y
将其转换为(_,9)矩阵[[3]]
这是我使用的一些代码
learning_rate = 0.01
training_epochs = 25
batch_size = 1
display_step = 1
x = tf.placeholder(tf.float32, [None,feature_column_count])
y = tf.placeholder(tf.float32, [None,n_target_classes])
W = tf.Variable(tf.zeros([feature_column_count,n_target_classes]))
b = tf.Variable(tf.zeros([n_target_classes]))
pred = tf.nn.softmax(tf.matmul(x,W)+b)
cost = tf.reduce_mean(-tf.reduce_sum(y*tf.log(pred), reduction_indices=1))
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
init = tf.global_variables_initializer()
def next_batch(num, data, labels):
idx = np.arange(0, len(data))
idx = idx[:num]
data_s = data[idx]
labels_s = labels[idx]
return np.asarray(data_s), np.asarray(labels_s)
def convert_y(size,n_classes,y):
yk = np.zeros((size,n_classes), dtype=np.int)
for a in range(len(y)):
yk[a,y[a]] = 1
return yk
with tf.Session() as sess:
sess.run(init)
for epoch in range(training_epochs):
avg_cost = 0.
total_batch = int(np.shape(TRAINING_SET.data)[0]/batch_size)
for i in range(total_batch):
features = TRAINING_SET.data
target = TRAINING_SET.target
batch_xs,batch_ys = next_batch(batch_size, features, target)
batch_ys = convert_y(len(batch_ys),n_target_classes,batch_ys)
print(batch_xs)
print(batch_ys)
_, c = sess.run([optimizer, cost], feed_dict={x: batch_xs, y: batch_ys})
avg_cost += c / total_batch
if (epoch+1) % display_step == 0:
print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(avg_cost))
print("Optimization Finished!")
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print("Test Accuracy:",accuracy.eval({x: TRAINING_SET.data, y: convert_y(len(TRAINING_SET.target),n_target_classes,TRAINING_SET.target)}))
print("Validation Accuracy:",accuracy.eval({x: VALIDATION_SET.data, y: convert_y(len(VALIDATION_SET.target),n_target_classes,VALIDATION_SET.target)}))
任何人都知道代码问题在哪里?
答案 0 :(得分:0)
对于回归,最好使用mean_square损失。你可以尝试以下方法。 渐变剪辑也有帮助
logits = tf.nn.relu(tf.matmul(x,W)+b)
cost = tf.reduce_mean(tf.square(tf.subtract(y, logits)))
learning_rate = 0.01
momentum = 0.9
optimizer = tf.train.MomentumOptimizer(learning_rate, momentum, use_nesterov=True)
grads_vars = optimizer.compute_gradients(cost)
cliped_grads_vars = []
for gard, var in grads_vars:
grad = tf.clip_by_norm(grad, max_norm=10.0)
clipped_grads_vars.append((gard, var))
train_op = optimizer.apply_gradients(clipped_gards_vars)
....
_, c = sess.run([train_op, cost], feed_dict={x: batch_xs, y: batch_ys})