我有少量数据,包含8个要素和5个值,可通过回归进行预测。示例数据显示在问题的末尾。
当我运行深度网络的训练时,经过大约400次迭代后,我得到权重的NaN值,而成本函数也导致NaN值,这意味着训练不起作用。为什么是这样?我对TensorFlow很新,我觉得有一些明显的遗漏,可能在成本计算中:
#!/usr/bin/env python
"""
usage:
program [options]
options:
-h, --help display help message
--version display version and exit
"""
import docopt
import subprocess
import numpy as np
import tensorflow as tf
def main(options):
# configuration
number_classes = 5
epochs = 10000001
learning_rate = 0.1
logs_path = "/tmp/run"
tf.reset_default_graph()
# TensorBoard
subprocess.Popen(["killall tensorboard"], shell = True)
subprocess.Popen(["rm -rf /tmp/run"], shell = True)
subprocess.Popen(["tensorboard --logdir=/tmp/run"], shell = True)
subprocess.Popen(["xdg-open http://127.0.1.1:6006"], shell = True)
data = np.loadtxt(
"data.csv",
skiprows = 1,
delimiter = ",",
dtype = np.float32
)
x_data = data[:, 0:- number_classes]
y_data = data[:, number_classes + 3:]
with tf.name_scope("input"):
X = tf.placeholder(tf.float32, [None, x_data.shape[1]])
Y = tf.placeholder(tf.float32, [None, y_data.shape[1]])
tf.summary.histogram("input", X)
with tf.name_scope("architecture"):
W1 = tf.Variable(tf.random_normal([x_data.shape[1], 50]), name = "weight1")
b1 = tf.Variable(tf.random_normal([50]), name = "bias1" )
layer1 = tf.sigmoid(tf.matmul(X, W1) + b1)
W2 = tf.Variable(tf.random_normal([50, 50]), name = "weight2")
b2 = tf.Variable(tf.random_normal([50]), name = "bias2" )
layer2 = tf.sigmoid(tf.matmul(layer1, W2) + b2)
W3 = tf.Variable(tf.random_normal([50, 50]), name = "weight3")
b3 = tf.Variable(tf.random_normal([50]), name = "bias3" )
layer3 = tf.sigmoid(tf.matmul(layer2, W3) + b3)
W4 = tf.Variable(tf.random_normal([50, y_data.shape[1]]), name = "weight4")
b4 = tf.Variable(tf.random_normal([y_data.shape[1]]), name = "bias4" )
hypothesis = tf.sigmoid(tf.matmul(layer3, W4) + b4)
#tf.summary.histogram("W1", W1)
#tf.summary.histogram("b1", b1)
#tf.summary.histogram("layer1", layer1)
#tf.summary.histogram("W2", W2)
#tf.summary.histogram("b2", b2)
#tf.summary.histogram("layer2", layer2)
#tf.summary.histogram("W3", W3)
#tf.summary.histogram("b3", b3)
#tf.summary.histogram("layer3", layer3)
#tf.summary.histogram("W4", W4)
#tf.summary.histogram("b4", b4)
#tf.summary.histogram("hypothesis", hypothesis)
with tf.name_scope("cost"):
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) * tf.log(1 - hypothesis))
train = tf.train.GradientDescentOptimizer(learning_rate = learning_rate).minimize(cost)
tf.summary.scalar("cost", cost)
with tf.name_scope("accuracy"):
# accuracy computation: true if hypothesis > 0.5 else false
predicted = tf.cast(hypothesis > 0.5, dtype = tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype = tf.float32))
tf.summary.scalar("accuracy", accuracy)
summary_operation = tf.summary.merge_all()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
writer = tf.summary.FileWriter(logs_path)
for step in range(epochs):
_, summary = sess.run([train, summary_operation], feed_dict = {X: x_data, Y: y_data})
writer.add_summary(summary, step)
if step % 2000 == 0:
print("\nstep: {step}\ncost: {cost}".format(
step = step,
cost = sess.run(cost, feed_dict = {X: x_data, Y: y_data})
))
print("\naccuracy report:")
h, c, a = sess.run([hypothesis, predicted, accuracy], feed_dict = {X: x_data, Y: y_data})
print("\nhypothesis:\n\n{hypothesis}\n\ncorrect (Y):\n\n{correct}\n\naccuracy: {accuracy}".format(
hypothesis = h,
correct = c,
accuracy = a
))
subprocess.Popen(["killall tensorboard"], shell = True)
if __name__ == "__main__":
options = docopt.docopt(__doc__)
if options["--version"]:
print(version)
exit()
main(options)
示例数据(data.csv
)如下:
i1,i2,i3,i4,i5,i6,i7,i8,o1,o2,o3,o4,o5
-1,-0.5352926315,-0.4935420352,-1,-0.4944026038,-0.253963208,-1,-0.8880478088,0.8546009151,0.4774468085,-0.062295082,-0.6523892052,-0.4402645721
-0.5522903811,-0.7816838836,-0.7696659213,-0.2629286881,-0.3547593658,-1,0.089373879,-0.4944223108,-0.8190137265,-0.1268085106,-0.3398907104,-0.760336717,-0.6254650682
-0.0890453721,-0.7323068451,-0.7808130027,1,-1,0.1459624163,-0.0262982891,1,0.4112862227,0.8110638298,0.5038251366,0.6484278287,0.8983050847
-0.4235644283,1,1,-0.7711244178,1,0.577533724,0.3119722709,-0.9884462151,-1,-1,-1,-0.9767269126,-1
0.8491470054,-0.2859093477,-0.3093481961,0.3165184782,-0.1465875018,-0.2885239412,-0.5480321625,-0.7645418327,0.649211998,0.130212766,-0.0754098361,-0.7865808368,-0.6494419181
1,-0.2139813608,-0.2666844664,0.8739490716,-0.85062216,1,0.4008423513,-0.1756972112,0.6807320793,0.050212766,1,1,1
-0.2383303085,-1,-1,0.0974414807,-0.824866335,-0.7023770251,-0.1418898495,-0.2003984064,0.8403660397,0.1234042553,0.2480874317,-0.3265659817,-0.2029764365
-0.4014083485,0.050288274,0.0192515213,0.3188169116,-0.3785831324,0.7963543885,-0.7363722467,-1,0.3523131673,1,0.1781420765,-1,-0.9859446052
0.2686170599,-0.1839134298,-0.2154657411,0.3874070042,-0.7997421445,0.9813940655,1,0.1442231076,1,0.8144680851,0.3333333333,0.1562267888,0.2740801984
0.8191506352,-0.6580510916,-0.6434992046,-0.3727696123,-0.7772633879,-0.056711512,0.2537734518,0.9589641434,0.6573462125,-0.3446808511,-0.0207650273,0.1606833375,0.2740801984