tensorflow:为什么多个rnn-rstm结果不一致?

时间:2017-10-16 08:31:05

标签: tensorflow lstm rnn

我在学习。

我想像这样制作多个rnn-rstm模型(许多输入一个输出)

50~60(输入3数据) - > 0(回答)

60~70 - > 1

70〜80 - > 2

(实施例)

51, 52, 57 -> 0  
67, 64, 68 -> 1  
73, 77, 78 -> 2  
53, 55, 51 -> 0   
62, 66, 63 -> 1   
73, 70, 73 -> 2

但结果并不如下所示。

早期的结果很好。

号码.... input_data ........回答...........假设...........费用(亏损)

9995   [[64, 65, 60]]   [[1]]   [[ 1.0044558]]   1.98542e-05
9996   [[70, 71, 77]]   [[2]]   [[ 1.99514413]]   2.35795e-05
9997   [[52, 52, 58]]   [[0]]   [[ 0.00810847]]   6.57474e-05
9998   [[61, 65, 61]]   [[1]]   [[ 1.00867283]]   7.5218e-05
9999   [[79, 72, 71]]   [[2]]   [[ 2.0111196]]   0.000123646
10000   [[56, 55, 58]]   [[0]]   [[-0.00018287]]   3.34404e-08

但经过一些学习后,结果不会收敛

号码.... input_data ........回答...........假设...........费用(亏损)

29995   [[51, 52, 57]]   [[0]]   [[ 1.00436962]]   1.00876
29996   [[67, 64, 68]]   [[1]]   [[ 0.99985003]]   2.24896e-08
29997   [[73, 77, 78]]   [[2]]   [[ 0.9957822]]   1.00845
29998   [[53, 55, 51]]   [[0]]   [[ 1.00436962]]   1.00876
29999   [[62, 66, 63]]   [[1]]   [[ 0.99985003]]   2.24896e-08
30000   [[73, 70, 73]]   [[2]]   [[ 0.9957822]]   1.00845

如果有人知道原因,请告诉我。

environment = anaconda

完整源代码

import tensorflow as tf
from tensorflow.contrib import rnn
import csv
import argparse
import os



csv_file = "./data3.csv"

batch_size = 1
input_sequence_length = 3 
output_sequence_length = 1
input_num_classes = 80
output_num_classes = 3
learning_rate = 0.1
rnn_stack = 5
hidden_size = 4

with tf.name_scope("placeholder") as scope:

    X = tf.placeholder(tf.int32, [None, input_sequence_length], name="x_input")
    X_one_hot = tf.one_hot(X, input_num_classes)

    Y = tf.placeholder(tf.int32, [None, output_sequence_length], name="y_input")  # 1
    Y_one_hot = tf.one_hot(Y, output_num_classes)  # one hot
    Y_one_hot = tf.reshape(Y_one_hot, [-1, output_num_classes])

    X_one_hot = tf.reshape(X_one_hot, [batch_size, input_sequence_length, input_num_classes])
    outputs = tf.to_float(X_one_hot)


with tf.name_scope("rnn") as scope:

    # Make a lstm cell with hidden_size (each unit output vector size)
    def lstm_cell():
        cell = rnn.BasicLSTMCell(hidden_size, state_is_tuple=True)
        return cell

    multi_cells = rnn.MultiRNNCell([lstm_cell() for _ in range(rnn_stack)], state_is_tuple=True)

    # outputs: unfolding size x hidden size, state = hidden size
    outputs, _states = tf.nn.dynamic_rnn(multi_cells, X_one_hot, dtype=tf.float32)

    Y_pred = tf.contrib.layers.fully_connected(outputs[:, -1], 1, activation_fn=None)


with tf.name_scope("cost") as scope:

    cost = tf.reduce_sum(tf.square(Y_pred - tf.to_float(Y)))

with tf.name_scope("train") as scope:

    train = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

with tf.name_scope("get_input_data") as scope:

    filename_queue = tf.train.string_input_producer([csv_file])
    key, value = tf.TextLineReader().read(filename_queue)

    input_list = []
    for l in range(input_sequence_length + output_sequence_length):
        input_list.append([1])

    data = tf.decode_csv(value, record_defaults=input_list)


with tf.Session() as sess:

    sess.run(tf.global_variables_initializer())

    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)

    step=0
    while step < 30000:
        step+=1
        datas = sess.run(data)

        dataX = []
        dataY = []
        # if testing max value = dataX.append(datas[0:-2]) dataY.append(datas[-2:-1])
        # else if testing min value = dataX.append(datas[0:-2]) dataY.append(datas[-1:])
        dataX.append(datas[0:-1])
        dataY.append(datas[-1:])

        _, hypo, loss= sess.run([train, Y_pred, cost], feed_dict={X: dataX, Y: dataY})

        print(step, " ", dataX, " ", dataY, " ", hypo, " ", loss)

    coord.request_stop()
    coord.join(threads)

0 个答案:

没有答案