我想为包装在tf.keras.layers.RNN中的tf.keras.layers.StackedRNN设置初始状态。但是,总会有形状错误。 由于新的keras集成不再使用LSTMStateTuple选项,因此(据我所知)必须自己构建初始状态
这是我尝试建立初始状态的地方,因为我尝试修复它,所以现在有点混乱了。
class RNNBasicCell:
def __init__(self, RNNArg, model_scope):
with tf.variable_scope(model_scope + 'Model'):
self.mode_id = model_scope
self.batchX_placeholder = tf.placeholder(tf.float32, shape=(None, RNNArg.timesteps, RNNArg.features))
self.batchY_placeholder = tf.placeholder(tf.int32, shape=(None, RNNArg.noutput))
self.num_layers = len(RNNArg.hidden_layer_size)
with tf.variable_scope(model_scope + 'init_state'):
cell = tf.keras.layers.LSTMCell(RNNArg.state_size)
self.init_state = tf.placeholder(dtype=tf.float32, shape=[self.num_layers, RNNArg.state_size, 2])
unpacked_state = tf.unstack(self.init_state, axis=0)
self.rnn_state = tf.cast([tf.nn.rnn_cell.RNNCell.zero_state(cell, batch_size=1, dtype=tf.float32) for _ in unpacked_state], dtype=tf.float32)
self.rnn_state = tf.unstack(tf.reshape(self.rnn_state, shape=[self.num_layers, RNNArg.state_size, 2]), axis=0)
self.labels = tf.reshape(self.batchY_placeholder, [-1])
self.weight = tf.Variable(np.random.rand(RNNArg.state_size, RNNArg.noutput), dtype=tf.float32)
self.bias = tf.Variable(np.zeros((1, RNNArg.noutput)), dtype=tf.float32)
这是我构建LSTM网络的地方。
class MultiLayerLSTM_Classificator(RNNBasicCell):
def __init__(self, RNNARg, model_scope):
RNNBasicCell.__init__(self, RNNARg, model_scope)
with tf.variable_scope(model_scope + 'Model'):
inputs = tf.keras.layers.Input(shape=(RNNARg.timesteps, RNNARg.features), tensor=self.batchX_placeholder)
# create a lstm layer list
self.rnn_layers = []
self.keepProb = tf.Variable(RNNARg.dropoutKeepProb)
self.noutput = tf.Variable(RNNARg.noutput)
self.dropout = tf.Variable(RNNARg.dropout)
for _ in RNNARg.hidden_layer_size:
self.cell = tf.keras.layers.LSTMCell(RNNARg.state_size)
if RNNARg.dropout == 1:
self.cell = tf.nn.rnn_cell.DropoutWrapper(self.cell, output_keep_prob=self.keepProb, variational_recurrent=True, dtype=tf.float32)
self.rnn_layers.append(self.cell)
self.cell = tf.keras.layers.StackedRNNCells(self.rnn_layers)
# create Model
self.rnn = tf.keras.layers.RNN(self.cell, return_state=True)(inputs, initial_state=self.rnn_state)
self.state_series = self.rnn[0]
self.current_state = self.rnn[1:]
self.current_state = tf.reshape(tf.concat([states for states in self.current_state], 0), [self.num_layers, 2, RNNARg.state_size])
self.state_series = tf.reshape(self.state_series, [-1, RNNARg.state_size])
# create ouput layer with softmax
self.output = tf.matmul(self.state_series, self.weight) + self.bias
self.labels = tf.reshape(tf.argmax(self.batchY_placeholder, axis=1), [RNNARg.batch_size])
# calculate loss
self.residuals = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.output, labels=self.labels)
self.loss = tf.reduce_mean(self.residuals)
self.train_steps = tf.train.AdamOptimizer(learning_rate=RNNARg.lr).minimize(self.loss)
这是初始化网络时出现的错误:
ValueError: An `initial_state` was passed that is not compatible with `cell.state_size`. Received `state_spec`=[InputSpec(shape=(20, 2), ndim=2), InputSpec(shape=(20, 2), ndim=2), InputSpec(shape=(20, 2), ndim=2)]; however `cell.state_size` is [[20, 20], [20, 20], [20, 20]]
任何帮助将不胜感激。