我的周期性批处理归一化类如下
class BNLSTMCell(RNNCell):
def __init__(
self,
num_units,
is_training=True,
use_peepholes=False,
cell_clip=None,
initializer=None,
num_proj=None,
proj_clip=None,
forget_bias=1.0,
state_is_tuple=True,
activation=tf.tanh,
reuse=None,
):
super(BNLSTMCell, self).__init__(_reuse=reuse)
if not state_is_tuple:
tf.logging.log_first_n(tf.logging.WARN,
'%s: Using a concatenated state is slower and will soon be deprecated. Use state_is_tuple=True.'
, 1, self)
self.num_units = num_units
self.is_training = is_training
self.use_peepholes = use_peepholes
self.cell_clip = cell_clip
self.num_proj = num_proj
self.proj_clip = proj_clip
self.initializer = initializer
self.forget_bias = forget_bias
self.state_is_tuple = state_is_tuple
self.activation = activation
if num_proj:
self._state_size = (LSTMStateTuple(num_units,
num_proj) if state_is_tuple else num_units
+ num_proj)
self._output_size = num_proj
else:
self._state_size = (LSTMStateTuple(num_units,
num_units) if state_is_tuple else 2
* num_units)
self._output_size = num_units
@property
def state_size(self):
return self._state_size
@property
def output_size(self):
return self._output_size
def call(self, inputs, state):
num_proj = (self.num_units if self.num_proj
is None else self.num_proj)
if self.state_is_tuple:
(c_prev, h_prev) = state
else:
c_prev = tf.slice(state, [0, 0], [-1, self.num_units])
h_prev = tf.slice(state, [0, self.num_units], [-1,
num_proj])
dtype = inputs.dtype
input_size = inputs.get_shape().with_rank(2)[1]
if input_size.value is None:
raise ValueError('Could not infer input size from inputs.get_shape()[-1]'
)
scope = tf.get_variable_scope()
with tf.variable_scope(scope or type(self).__name__):
W_xh = tf.get_variable('input_kernel', [input_size, 4
* self.num_units],
initializer=orthogonal_initializer())
W_hh = tf.get_variable('state_kernel', [num_proj, 4
* self.num_units],
initializer=bn_lstm_identity_initializer(0.95))
xh = tf.matmul(inputs, W_xh)
hh = tf.matmul(h_prev, W_hh)
bn_xh = batch_norm(xh, 'input', self.is_training)
bn_hh = batch_norm(hh, 'state', self.is_training)
bias = tf.get_variable('bias', [4 * self.num_units])
# i:input gate, j:new input, f:forget gate, o:output gate
lstm_matrix = tf.nn.bias_add(tf.add(bn_xh, bn_hh), bias)
(i, j, f, o) = tf.split(value=lstm_matrix,
num_or_size_splits=4, axis=1)
# Diagonal connections
if self.use_peepholes:
w_f_diag = tf.get_variable('W_F_diag',
shape=[self.num_units], dtype=dtype)
w_i_diag = tf.get_variable('W_I_diag',
shape=[self.num_units], dtype=dtype)
w_o_diag = tf.get_variable('W_O_diag',
shape=[self.num_units], dtype=dtype)
if self.use_peepholes:
c = c_prev * tf.sigmoid(f + self.forget_bias + w_f_diag
* c_prev) + tf.sigmoid(i + w_i_diag * c_prev) \
* self.activation(j)
else:
c = c_prev * tf.sigmoid(f + self.forget_bias) \
+ tf.sigmoid(i) * self.activation(j)
if self.cell_clip is not None:
c = tf.clip_by_value(c, -self.cell_clip, self.cell_clip)
bn_c = batch_norm(c, 'cell', self.is_training)
if self.use_peepholes:
h = tf.sigmoid(o + w_o_diag * c) * self.activation(bn_c)
else:
h = tf.sigmoid(o) * self.activation(bn_c)
if self.num_proj is not None:
w_proj = tf.get_variable('projection/kernel',
[self.num_units, num_proj], dtype=dtype)
h = tf.matmul(h, w_proj)
if self.proj_clip is not None:
h = tf.clip_by_value(h, -self.proj_clip,
self.proj_clip)
new_state = (LSTMStateTuple(c,
h) if self.state_is_tuple else tf.concat(values=[c,
h], axis=1))
return (h, new_state)
我的batch_norm函数如下
def batch_norm(x, name_scope, is_training):
with tf.variable_scope(name_scope):
return tf.layers.batch_normalization(inputs=x,training=is_training,fused=True)
在培训期间,我有一个函数可以检索update_ops
def _update(self, loss, learning_rate, cluster):
'''
create the op to update the model
args:
loss: the loss to minimize
learning_rate: the learning rate
cluster: the tf cluster
returns: the update op
'''
#create the optimizer
optimizer = tf.train.AdamOptimizer(learning_rate)
#create an optimizer that aggregates gradients
if int(self.conf['numbatches_to_aggregate']) > 0:
if 'local' in cluster.as_dict():
num_workers = 1
else:
num_workers = len(cluster.as_dict()['worker'])
optimizer = tf.train.SyncReplicasOptimizer(
opt=optimizer,
replicas_to_aggregate=int(
self.conf['numbatches_to_aggregate']),
total_num_replicas=num_workers)
tf.summary.scalar('training_loss', loss,
collections=['training_summaries'])
#get the list of trainable variables
trainable = tf.trainable_variables()
#get the list of variables to be removed from the trainable
#variables
untrainable = tf.get_collection('untrainable')
#remove the variables
trainable = [var for var in trainable
if var not in untrainable]
#compute the gradients
grads_and_vars = optimizer.compute_gradients(
loss=loss,
var_list=trainable)
with tf.variable_scope('clip'):
#clip the gradients
grads_and_vars = [(tf.clip_by_value(grad, -1., 1.), var)
for grad, var in grads_and_vars]
#opperation to apply the gradients
apply_gradients_op = optimizer.apply_gradients(
grads_and_vars=grads_and_vars,
name='apply_gradients')
#all remaining operations with the UPDATE_OPS GraphKeys
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
print("update_ops {}".format(update_ops))
print("################")
#create an operation to update the gradients, the batch_loss
#and do all other update ops
update_op = tf.group(
*([apply_gradients_op] + update_ops),
name='update')
return update_op
我收到的错误消息如下:
文件“ /home/ubuntu/workspace/reproduce/jobs/nabu/nabu/neuralnetworks/trainers/trainer.py”,在火车上的769行 输出['training_summaries']]) 运行中的文件“ /home/ubuntu/anaconda3/envs/tensorflow_p27/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.py”,第671行 run_metadata = run_metadata) 运行中的文件“ /home/ubuntu/anaconda3/envs/tensorflow_p27/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.py”,行1156 run_metadata = run_metadata) 运行中的文件“ /home/ubuntu/anaconda3/envs/tensorflow_p27/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.py”,行1255 加六.reraise(* original_exc_info) 运行中的文件“ /home/ubuntu/anaconda3/envs/tensorflow_p27/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.py”,第1240行 返回self._sess.run(* args,** kwargs) 运行中的文件“ /home/ubuntu/anaconda3/envs/tensorflow_p27/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.py”,第1312行 run_metadata = run_metadata) 运行中的文件“ /home/ubuntu/anaconda3/envs/tensorflow_p27/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.py”,行1076 返回self._sess.run(* args,** kwargs) 运行中的文件“ /home/ubuntu/anaconda3/envs/tensorflow_p27/lib/python2.7/site-packages/tensorflow/python/client/session.py”,第929行 run_metadata_ptr) 在_run中的文件“ /home/ubuntu/anaconda3/envs/tensorflow_p27/lib/python2.7/site-packages/tensorflow/python/client/session.py”,第1152行 feed_dict_tensor,选项,run_metadata) _do_run中的文件“ /home/ubuntu/anaconda3/envs/tensorflow_p27/lib/python2.7/site-packages/tensorflow/python/client/session.py”,行1328 run_metadata) _do_call中的文件“ /home/ubuntu/anaconda3/envs/tensorflow_p27/lib/python2.7/site-packages/tensorflow/python/client/session.py”,行1348 提高类型(e)(node_def,op,消息) tensorflow.python.framework.errors_impl.InvalidArgumentError:节点训练/更新(定义在/home/ubuntu/workspace/reproduce/jobs/nabu/nabu/neuralnetworks/trainers/trainer.py:578)具有来自不同框架的输入。输入节点train / Listener / features / layer1 / BLSTM / bidirectional_rnn / fw / fw / while / fw / bnlstm_cell / bnlstm_cell / state / batch_normalization / AssignMovingAvg(定义在/ home / ubuntu / workspace / reproduce / jobs / nabu / nabu / Neuronetworks / components / recurrent_batch.py:61)位于“火车/监听器/功能/ layer1 / BLSTM / bidirectional_rnn / fw / fw / while / while_context”框架中。输入节点train / apply_gradients / Assign(在/home/ubuntu/workspace/reproduce/jobs/nabu/nabu/neuralnetworks/trainers/trainer.py:569中定义)位于框架“''中。