使用lstm tensorflow进行序列预测

时间:2018-04-30 08:57:19

标签: python tensorflow time-series lstm prediction

所以我是使用Tensorflow的新手,所以请耐心等待。

我想为序列预测创建一个LSTM rnn,但由于我也将使用它来调整超参数,我希望输入为格式 [N1,N2,N3 ...] 和输出将是一个数字。

例如:输入[1,2,3,4] |输出= [5]

我创建了一个这样的字典: dict = {“n1”:array(1,2,3,4,5),“n2”:array(2,3,4,5,6),“n3”:array(3,4,5,6) ,7),“n4”:数组(4,5,6,7,8)}

标签/输出= [5,6,7,8,9]

我想使用估算器,我将使用DynamicRnnEstimator()但它将在未来版本中删除,所以我想为此创建我自己的自定义估算器。

我想知道如何为训练模式的特定场景创建自己的自定义估算器,以预测给定序列“sequence_length”的下一个数字。

我有很多训练序列。

我到目前为止编写的代码如下:

# START

# IMPORTS

import numpy as np
import tensorflow as tf
import csv
from tensorflow.contrib.learn import DynamicRnnEstimator
from tensorflow.python import debug as tf_debug

# GLOBAL VARIABLES
sequence_length = 4 # predict the 5th number give the previous sequence_length numbers.
batch_size = 10
n_epochs = 100
n_neurons = 5

# NORMALIZING THE DATA
def normalize_data(train, test):
    m11 = float(max(train))
    m12 = float(min(train))
    normalized_train = [float((x-m12)/(m11-m12)) for x in train]
    m21 = float(max(test))
    m22 = float(min(train))
    normalized_test = [float((x-m22)/(m21-m22)) for x in test]
    return normalized_train, normalized_test

# LOADING THE DATA
def load_data(train, test):
    with open(train, 'r') as csvfile1:
        reader1 = csv.reader(csvfile1, delimiter = ',')
        train = [ float(row[1]) for row in reader1]
    with open(test, 'r') as csvfile2:
        reader2 = csv.reader(csvfile2, delimiter = ',')
        test = [ float(row[1]) for row in reader2]
    normalized_train, normalized_test = normalize_data(train, test)
    global sequence_length
    trainx = [ normalized_train[i:i+sequence_length] for i in range(len(normalized_train) - sequence_length-2)]
    testx = [ normalized_test[i:i+sequence_length] for i in range(len(normalized_test) - sequence_length-2)]
    trainy = [ normalized_train[i] for i in range(sequence_length+1, len(normalized_train))]
    testy = [ normalized_test[i] for i in range(sequence_length+1, len(normalized_test))]
    return trainx, testx, trainy, testy

# Create Dict function
def create_dict(x,y):
    feature_dict = dict()
    global sequence_length
    for i in range(sequence_length):
        temp = "number"+str(i+1)
        feature_dict[temp] = x[:,i]
    labels = y
    return feature_dict, labels

# Training input function
def train_input_fn(features, labels, batch_size):
    # Convert the inputs to a Dataset.
    dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
    # Shuffle, repeat, and batch the examples.
    dataset = dataset.shuffle(100).repeat().batch(batch_size)
    # Return the read end of the pipeline.
    return dataset.make_one_shot_iterator().get_next()

# Create feature columns
def create_feature_columns():
    feature_column = []
    for i in range(sequence_length):
        feature_column.append(tf.feature_column.numeric_column(key="number"+str(i+1)))
    return feature_column

# Model_ function
def my_model_fn(features, labels, mode, params):
     net = tf.feature_column.input_layer(features, params['feature_columns'])
     for units in params['hidden_units']:
        net = tf.layers.dense(net, units=units, activation=tf.nn.relu)
     logits = tf.layers.dense(net, params['n_classes'], activation=None)

def main():
    global sequence_length, batch_size, n_epochs, n_neurons
    trainx, testx, trainy, testy = load_data("train.csv", "test.csv")
    trainx, testx, trainy, testy = np.array(trainx), np.array(testx), np.array(trainy), np.array(testy )
    n_train = len(trainx)
    n_test = len(testx)
    feature_dict_train, label_train = create_dict(trainx, trainy)
    feature_dict_test, label_test = create_dict(testx, testy)
    feature_column = create_feature_columns()



main()

感谢提前!

0 个答案:

没有答案