我在学习。
我想像这样制作多个rnn-rstm模型(许多输入一个输出)
50~60(输入3数据) - > 0(回答)
60~70 - > 1
70〜80 - > 2
(实施例)
51, 52, 57 -> 0
67, 64, 68 -> 1
73, 77, 78 -> 2
53, 55, 51 -> 0
62, 66, 63 -> 1
73, 70, 73 -> 2
但结果并不如下所示。
早期的结果很好。号码.... input_data ........回答...........假设...........费用(亏损)
9995 [[64, 65, 60]] [[1]] [[ 1.0044558]] 1.98542e-05
9996 [[70, 71, 77]] [[2]] [[ 1.99514413]] 2.35795e-05
9997 [[52, 52, 58]] [[0]] [[ 0.00810847]] 6.57474e-05
9998 [[61, 65, 61]] [[1]] [[ 1.00867283]] 7.5218e-05
9999 [[79, 72, 71]] [[2]] [[ 2.0111196]] 0.000123646
10000 [[56, 55, 58]] [[0]] [[-0.00018287]] 3.34404e-08
但经过一些学习后,结果不会收敛
号码.... input_data ........回答...........假设...........费用(亏损)
29995 [[51, 52, 57]] [[0]] [[ 1.00436962]] 1.00876
29996 [[67, 64, 68]] [[1]] [[ 0.99985003]] 2.24896e-08
29997 [[73, 77, 78]] [[2]] [[ 0.9957822]] 1.00845
29998 [[53, 55, 51]] [[0]] [[ 1.00436962]] 1.00876
29999 [[62, 66, 63]] [[1]] [[ 0.99985003]] 2.24896e-08
30000 [[73, 70, 73]] [[2]] [[ 0.9957822]] 1.00845
如果有人知道原因,请告诉我。
environment = anaconda
完整源代码
import tensorflow as tf
from tensorflow.contrib import rnn
import csv
import argparse
import os
csv_file = "./data3.csv"
batch_size = 1
input_sequence_length = 3
output_sequence_length = 1
input_num_classes = 80
output_num_classes = 3
learning_rate = 0.1
rnn_stack = 5
hidden_size = 4
with tf.name_scope("placeholder") as scope:
X = tf.placeholder(tf.int32, [None, input_sequence_length], name="x_input")
X_one_hot = tf.one_hot(X, input_num_classes)
Y = tf.placeholder(tf.int32, [None, output_sequence_length], name="y_input") # 1
Y_one_hot = tf.one_hot(Y, output_num_classes) # one hot
Y_one_hot = tf.reshape(Y_one_hot, [-1, output_num_classes])
X_one_hot = tf.reshape(X_one_hot, [batch_size, input_sequence_length, input_num_classes])
outputs = tf.to_float(X_one_hot)
with tf.name_scope("rnn") as scope:
# Make a lstm cell with hidden_size (each unit output vector size)
def lstm_cell():
cell = rnn.BasicLSTMCell(hidden_size, state_is_tuple=True)
return cell
multi_cells = rnn.MultiRNNCell([lstm_cell() for _ in range(rnn_stack)], state_is_tuple=True)
# outputs: unfolding size x hidden size, state = hidden size
outputs, _states = tf.nn.dynamic_rnn(multi_cells, X_one_hot, dtype=tf.float32)
Y_pred = tf.contrib.layers.fully_connected(outputs[:, -1], 1, activation_fn=None)
with tf.name_scope("cost") as scope:
cost = tf.reduce_sum(tf.square(Y_pred - tf.to_float(Y)))
with tf.name_scope("train") as scope:
train = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
with tf.name_scope("get_input_data") as scope:
filename_queue = tf.train.string_input_producer([csv_file])
key, value = tf.TextLineReader().read(filename_queue)
input_list = []
for l in range(input_sequence_length + output_sequence_length):
input_list.append([1])
data = tf.decode_csv(value, record_defaults=input_list)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
step=0
while step < 30000:
step+=1
datas = sess.run(data)
dataX = []
dataY = []
# if testing max value = dataX.append(datas[0:-2]) dataY.append(datas[-2:-1])
# else if testing min value = dataX.append(datas[0:-2]) dataY.append(datas[-1:])
dataX.append(datas[0:-1])
dataY.append(datas[-1:])
_, hypo, loss= sess.run([train, Y_pred, cost], feed_dict={X: dataX, Y: dataY})
print(step, " ", dataX, " ", dataY, " ", hypo, " ", loss)
coord.request_stop()
coord.join(threads)