我已经构建了这个LSTM类:
import tensorflow as tf
import Constants
class LSTM():
def __init__(self,
inputShape,
outputShape,
numLayers=Constants.numLayers,
numHidden=Constants.numHidden,
learningRate=Constants.learningRate,
forgetBias=Constants.forgetBias):
self.inputs = tf.placeholder(tf.float32, [None] + inputShape)
self.labels = tf.placeholder(tf.float32, [None] + outputShape)
self.inputTensors = tf.unstack(self.inputs, axis=1)
self.weights = tf.Variable(tf.random_normal([numHidden] + outputShape))
self.bias = tf.Variable(tf.random_normal(outputShape))
layers = [tf.contrib.rnn.LSTMCell(numHidden, forget_bias=forgetBias, state_is_tuple=True)] * numLayers
self.cell = tf.contrib.rnn.MultiRNNCell(layers, state_is_tuple=True)
self.optimiser = tf.train.GradientDescentOptimizer(learningRate)
self.forgetBias = forgetBias
self.batchDict = None
self.outputs = None
self.finalStates = None
self.predictions = None
self.loss = None
self.accuracy = None
self.optimise = None
self.session = tf.Session()
self.__buildGraph()
def __buildGraph(self):
outputs, finalStates = tf.nn.static_rnn(self.cell, self.inputTensors, dtype=tf.float32)
predictions = tf.add(tf.matmul(outputs[-1], self.weights), self.bias)
self.predictions = tf.minimum(tf.maximum(predictions, 0), 1)
self.loss = tf.losses.mean_squared_error(predictions=self.predictions, labels=self.labels)
self.accuracy = tf.reduce_mean(1 - tf.abs(self.labels - self.predictions) / 1.0)
self.optimise = self.optimiser.minimize(self.loss)
self.session.run(tf.global_variables_initializer())
def __execute(self, operation):
return self.session.run(operation, self.batchDict)
def setBatch(self, inputs, labels):
self.batchDict = {self.inputs: inputs, self.labels: labels}
def batchLabels(self):
return self.__execute(self.labels)
def batchPredictions(self):
return self.__execute(self.predictions)
def batchLoss(self):
return self.__execute(self.loss)
def batchAccuracy(self):
return self.__execute(self.accuracy)
def processBatch(self):
self.__execute(self.optimise)
def kill(self):
self.session.close()
我这样运行:
import DataWorker
import Constants
from Model import LSTM
inputShape = [Constants.sequenceLength, DataWorker.numFeatures]
outputShape = [1]
LSTM = LSTM(inputShape, outputShape)
# #############################################
# TRAINING
# #############################################
for epoch in range(Constants.numEpochs):
print("***** EPOCH:", epoch + 1, "*****\n")
IDPointer, TSPointer = 0, 0
epochComplete = False
batchNum = 0
while not epochComplete:
batchNum += 1
batchX, batchY, IDPointer, TSPointer, epochComplete = DataWorker.generateBatch(IDPointer, TSPointer)
LSTM.setBatch(batchX, batchY)
LSTM.processBatch()
if batchNum % Constants.printStep == 0 or epochComplete:
print("Batch:\t\t", batchNum)
print("Last Pred:\t", LSTM.batchPredictions()[-1][0])
print("Last Label:\t", LSTM.batchLabels()[-1][0])
print("Loss:\t\t", LSTM.batchLoss())
print("Accuracy:\t", str("%.2f" % (LSTM.batchAccuracy() * 100) + "%\n"))
# #############################################
# TESTING
# #############################################
testX, testY = DataWorker.generateTestBatch()
LSTM.setBatchDict(testX, testY)
testAccuracy = LSTM.batchAccuracy()
print("Testing Accuracy:", str("%.2f" % (testAccuracy * 100) + "%"))
LSTM.kill()
这一切都很有效。但是,我使用的时间序列数据包括跨越时间戳范围的金融股,远远大于我的LSTM展开的时间步数 - Constants.sequenceLength
。因此,处理单个库存需要许多连续批次,因此我的LSTM的状态/内存需要在批次之间传递。除此之外,在完成ID生命周期的批处理之后,下一批将从我的数据集的初始时间戳传入一个新ID,因此我想重置内存。
There are many questions asking something similar, and all of the answers are adequate但是,似乎都没有解决使用变量批量大小的问题 - 批量大小初始化为None
,然后在批量传入时推断。我的批次通常是一个恒定的大小,但在某些情况下确实会改变,我无法改变这一点。如果我没有指定批量大小,如何控制批次之间的状态传递以及重置状态?