编辑：

Question

我有这个问题，在一次迭代后几乎所有的参数（成本函数，权重，假设函数等）都输出'NaN'。我的代码类似于张量流教程MNIST-Expert（https://www.tensorflow.org/versions/r0.9/tutorials/mnist/pros/index.html）。我已经寻找解决方案，到目前为止我尝试过：将学习率降低到接近零并将其设置为零，使用AdamOptimizer而不是梯度下降，使用sigmoid函数作为最后一层中的假设函数并仅使用numpy函数。我的输入数据中有一些负值和零值，因此我不能使用对数交叉熵而不是二次成本函数。结果是一样的，但我的输入数据包括土壤的应力和应变。

import tensorflow as tf
import Datafiles3_pv_complete as soil
import numpy as np

m_training = int(18.0)
m_cv = int(5.0)
m_test = int(5.0)
total_examples = 28

" range for running "
range_training = xrange(0,m_training)
range_cv = xrange(m_training,(m_training+m_cv))
range_test = xrange((m_training+m_cv),total_examples)

""" Using interactive Sessions"""
sess = tf.InteractiveSession()

""" creating input and output vectors """
x = tf.placeholder(tf.float32, shape=[None, 11])
y_true = tf.placeholder(tf.float32, shape=[None, 3])

""" Standard Deviation Calculation"""
stdev = np.divide(2.0,np.sqrt(np.prod(x.get_shape().as_list()[1:])))

""" Weights and Biases """

def weights(shape):
    initial = tf.truncated_normal(shape, stddev=stdev)
    return tf.Variable(initial)

def bias(shape):
    initial = tf.truncated_normal(shape, stddev=1.0)
    return tf.Variable(initial)

""" Creating weights and biases for all layers """
theta1 = weights([11,7])
bias1 = bias([1,7])

theta2 = weights([7,7])
bias2 = bias([1,7])

"Last layer"
theta3 = weights([7,3])
bias3 = bias([1,3])


""" Hidden layer input (Sum of weights, activation functions and bias)
z = theta^T * activation + bias
"""
def Z_Layer(activation,theta,bias):
    return tf.add(tf.matmul(activation,theta),bias)

""" Creating the sigmoid function 
sigmoid = 1 / (1 + exp(-z))
"""
def Sigmoid(z):
    return tf.div(tf.constant(1.0),tf.add(tf.constant(1.0), tf.exp(tf.neg(z))))

""" hypothesis functions - predicted output """    
' layer 1 - input layer '
hyp1 = x
' layer 2 '
z2 = Z_Layer(hyp1, theta1, bias1)
hyp2 = Sigmoid(z2)
' layer 3 '
z3 = Z_Layer(hyp2, theta2, bias2)
hyp3 = Sigmoid(z3)
' layer 4 - output layer '
zL = Z_Layer(hyp3, theta3, bias3)
hypL = tf.add( tf.add(tf.pow(zL,3), tf.pow(zL,2) ), zL)


""" Cost function """
cost_function = tf.mul( tf.div(0.5, m_training), tf.pow( tf.sub(hypL, y_true), 2)) 

#cross_entropy = -tf.reduce_sum(y_true*tf.log(hypL) + (1-y_true)*tf.log(1-hypL))

""" Gradient Descent """
train_step = tf.train.GradientDescentOptimizer(learning_rate=0.003).minimize(cost_function)       

"""    Training and Evaluation     """

correct_prediction = tf.equal(tf.arg_max(hypL, 1), tf.arg_max(y_true, 1))

accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

sess.run(tf.initialize_all_variables())

keep_prob = tf.placeholder(tf.float32)

""" Testing - Initialise lists  """
hyp1_test = []
z2_test = []
hyp2_test = []
z3_test = []
hyp3_test = []
zL_test = []
hypL_test = []
cost_function_test =[]
complete_error_test = []
theta1_test = []
theta2_test = []
theta3_test = []
bias1_test = []
bias2_test = []
bias3_test = []
""" -------------------------   """

complete_error_init = tf.abs(tf.reduce_mean(tf.sub(hypL,y_true),1))

training_error=[]
for j in range_training:
    feedj = {x: soil.input_scale[j], y_true: soil.output_scale[j] , keep_prob: 1.0}

    """ -------------------------   """
    'Testing - adding to list'
    z2_init = z2.eval(feed_dict=feedj)
    z2_test.append(z2_init)

    hyp2_init = hyp2.eval(feed_dict=feedj)
    hyp2_test.append(hyp2_init)

    z3_init = z3.eval(feed_dict=feedj)
    z3_test.append(z3_init)

    hyp3_init = hyp3.eval(feed_dict=feedj)
    hyp3_test.append(hyp3_init)

    zL_init = zL.eval(feed_dict=feedj)
    zL_test.append(zL_init)

    hypL_init = hypL.eval(feed_dict=feedj)
    hypL_test.append(hypL_init)

    cost_function_init = cost_function.eval(feed_dict=feedj)
    cost_function_test.append(cost_function_init)

    complete_error = complete_error_init.eval(feed_dict=feedj)
    complete_error_test.append(complete_error)
    print 'number iterations: %g, error (S1, S2, S3): %g, %g, %g' % (j, complete_error[0], complete_error[1], complete_error[2])

    theta1_init = theta1.eval()
    theta1_test.append(theta1_init)

    theta2_init = theta2.eval()
    theta2_test.append(theta2_init)

    theta3_init = theta3.eval()
    theta3_test.append(theta3_init)

    bias1_init = bias1.eval()
    bias1_test.append(bias1_init)

    bias2_init = bias2.eval()
    bias2_test.append(bias2_init)

    bias3_init = bias3.eval()
    bias3_test.append(bias3_init)
    """ -------------------------   """

    train_accuracy = accuracy.eval(feed_dict=feedj)
    print("step %d, training accuracy %g" % (j, train_accuracy))
    train_step.run(feed_dict=feedj)
    training_error.append(1 - train_accuracy)

cv_error=[]    
for k in range_cv:
feedk = {x: soil.input_scale[k], y_true: soil.output_scale[k] , keep_prob: 1.0}
    cv_accuracy = accuracy.eval(feed_dict=feedk)
    print("cross-validation accuracy %g" % cv_accuracy)
    cv_error.append(1-cv_accuracy) 

for l in range_test:
    print("test accuracy %g" % accuracy.eval(feed_dict={x: soil.input_matrixs[l], y_true: soil.output_matrixs[l], keep_prob: 1.0}))

最近几周我正在研究这个问题的单元模型，但是发生了相同的输出。我不知道下一步该尝试什么。希望有人可以帮助我。

编辑：

我再次详细检查了一些参数。第3层和第4层（最后一层）的假设函数（hyp）和激活函数（z）对于每个数据点具有相同的条目，即一列中每行的相同值。

Answer 1

对于您所描述的分类器，

1e ^ -3仍然相当高。 NaN实际上意味着权重倾向于无穷大，因此我建议探索更低的学习率，特别是1e ^ -7左右。如果它继续发散，则将你的学习率乘以0.1，并重复直到权重是有限值的。

Answer 2

最后，没有更多的NaN值。解决方案是扩展我的输入和输出数据。结果（准确性）仍然不好，但至少我得到一些参数的实际值。我在其他尝试之前尝试过功能扩展（我可能还有其他一些错误），并认为它对我的问题也无济于事。

NaN导致张量流神经网络

编辑：

2 个答案: