Question

我已经构建了这个LSTM类：

import tensorflow as tf
import Constants


class LSTM():

    def __init__(self,
                 inputShape,
                 outputShape,
                 numLayers=Constants.numLayers,
                 numHidden=Constants.numHidden,
                 learningRate=Constants.learningRate,
                 forgetBias=Constants.forgetBias):
        self.inputs = tf.placeholder(tf.float32, [None] + inputShape)
        self.labels = tf.placeholder(tf.float32, [None] + outputShape)
        self.inputTensors = tf.unstack(self.inputs, axis=1)
        self.weights = tf.Variable(tf.random_normal([numHidden] + outputShape))
        self.bias = tf.Variable(tf.random_normal(outputShape))
        layers = [tf.contrib.rnn.LSTMCell(numHidden, forget_bias=forgetBias, state_is_tuple=True)] * numLayers
        self.cell = tf.contrib.rnn.MultiRNNCell(layers, state_is_tuple=True)
        self.optimiser = tf.train.GradientDescentOptimizer(learningRate)
        self.forgetBias = forgetBias
        self.batchDict = None
        self.outputs = None
        self.finalStates = None
        self.predictions = None
        self.loss = None
        self.accuracy = None
        self.optimise = None
        self.session = tf.Session()
        self.__buildGraph()

    def __buildGraph(self):
        outputs, finalStates = tf.nn.static_rnn(self.cell, self.inputTensors, dtype=tf.float32)
        predictions = tf.add(tf.matmul(outputs[-1], self.weights), self.bias)
        self.predictions = tf.minimum(tf.maximum(predictions, 0), 1)
        self.loss = tf.losses.mean_squared_error(predictions=self.predictions, labels=self.labels)
        self.accuracy = tf.reduce_mean(1 - tf.abs(self.labels - self.predictions) / 1.0)
        self.optimise = self.optimiser.minimize(self.loss)
        self.session.run(tf.global_variables_initializer())

    def __execute(self, operation):
        return self.session.run(operation, self.batchDict)

    def setBatch(self, inputs, labels):
        self.batchDict = {self.inputs: inputs, self.labels: labels}

    def batchLabels(self):
        return self.__execute(self.labels)

    def batchPredictions(self):
        return self.__execute(self.predictions)

    def batchLoss(self):
        return self.__execute(self.loss)

    def batchAccuracy(self):
        return self.__execute(self.accuracy)

    def processBatch(self):
        self.__execute(self.optimise)

    def kill(self):
        self.session.close()

我这样运行：

import DataWorker
import Constants
from Model import LSTM

inputShape = [Constants.sequenceLength, DataWorker.numFeatures]
outputShape = [1]

LSTM = LSTM(inputShape, outputShape)

# #############################################
# TRAINING
# #############################################
for epoch in range(Constants.numEpochs):
    print("***** EPOCH:", epoch + 1, "*****\n")
    IDPointer, TSPointer = 0, 0
    epochComplete = False
    batchNum = 0
    while not epochComplete:
        batchNum += 1
        batchX, batchY, IDPointer, TSPointer, epochComplete = DataWorker.generateBatch(IDPointer, TSPointer)
        LSTM.setBatch(batchX, batchY)
        LSTM.processBatch()
        if batchNum % Constants.printStep == 0 or epochComplete:
            print("Batch:\t\t", batchNum)
            print("Last Pred:\t", LSTM.batchPredictions()[-1][0])
            print("Last Label:\t", LSTM.batchLabels()[-1][0])
            print("Loss:\t\t", LSTM.batchLoss())
            print("Accuracy:\t", str("%.2f" % (LSTM.batchAccuracy() * 100) + "%\n"))

# #############################################
# TESTING
# #############################################
testX, testY = DataWorker.generateTestBatch()
LSTM.setBatchDict(testX, testY)
testAccuracy = LSTM.batchAccuracy()
print("Testing Accuracy:", str("%.2f" % (testAccuracy * 100) + "%"))

LSTM.kill()

这一切都很有效。但是，我使用的时间序列数据包括跨越时间戳范围的金融股，远远大于我的LSTM展开的时间步数 - Constants.sequenceLength。因此，处理单个库存需要许多连续批次，因此我的LSTM的状态/内存需要在批次之间传递。除此之外，在完成ID生命周期的批处理之后，下一批将从我的数据集的初始时间戳传入一个新ID，因此我想重置内存。

There are many questions asking something similar, and all of the answers are adequate但是，似乎都没有解决使用变量批量大小的问题 - 批量大小初始化为None，然后在批量传入时推断。我的批次通常是一个恒定的大小，但在某些情况下确实会改变，我无法改变这一点。如果我没有指定批量大小，如何控制批次之间的状态传递以及重置状态？

LSTM在使用可变批量大小

0 个答案: