我是tensorflow的新手。我有一个问题,从检查点加载模型并继续对其进行训练后,我得到了错误,除了错误之外,另一件事是验证准确性为零,就好像它从一开始就是训练模型一样。
这是错误
InvalidArgumentError:您必须使用dtype int32和形状[?,?]
输入占位符张量'word_ids'的值[[[节点:word_ids = Placeholderdtype = DT_INT32,shape = [?,?],_device =“ / job:localhost / replica:0 / task:0 / gpu:0”]]
[[[节点:mask_1 / _1219 = _Recvclient_terminated = false,recv_device =“ / job:localhost /副本0 / task:0 / cpu:0”,send_device =“ / job:localhost /副本0 / task: 0 / gpu:0“,send_device_incarnation = 1,tensor_name =” edge_24_mask_1“,tensor_type = DT_BOOL,_device =” / job:localhost /副本:0 / task:0 / cpu:0“]]
在加载模型并对其进行评估后出现提示
epoch 35 method =宏标签| f1 = 0.00 时代35 method = micro-labels | f1 = 0.00
这是型号代码
def _reverse(input_, seq_lengths, seq_dim, batch_dim): # reverses sequences with right-padding correctly
return array_ops.reverse_sequence(
input=input_, seq_lengths=seq_lengths,
seq_dim=seq_dim, batch_dim=batch_dim)
def bilstms_interleaved(inputs, num_layers, size, keep_prob, lengths):
outputs = inputs
print('interleaved')
for layer in range(num_layers):
direction = 'backw.' if layer % 2 else 'forw.'
print('Layer {}: Creating {} LSTM'.format(layer, direction)) # backwards if layer odd
with tf.variable_scope('{}_lstm_{}'.format(direction, layer)):
# cell
cell = HighwayWrapper(DropoutWrapper(LSTMCell(size,initializer=tf.orthogonal_initializer()),
variational_recurrent=True,
dtype=tf.float32,
state_keep_prob=keep_prob))
# calc either bw or fw - interleaving is done at graph construction (not runtime)
if direction == 'backw.':
outputs_reverse = _reverse(outputs, seq_lengths=lengths, seq_dim=1, batch_dim=0)
tmp, _ = tf.nn.dynamic_rnn(cell=cell,
inputs=outputs_reverse,
sequence_length=lengths,
dtype=tf.float32)
outputs = _reverse(tmp, seq_lengths=lengths, seq_dim=1, batch_dim=0)
else:
outputs, _ = tf.nn.dynamic_rnn(cell=cell,
inputs=outputs,
sequence_length=lengths,
dtype=tf.float32)
return outputs
class Model():
def __init__(self, config, embeddings, num_labels, g):
# embedding
#with tf.device('/cpu:0'):
# stacked bilstm
with tf.device('/gpu:0'):
self.word_ids = tf.placeholder(tf.int32, [None, None], name='word_ids')
embedded = tf.nn.embedding_lookup(embeddings, self.word_ids, name='embedded')
self.predicate_ids = tf.placeholder(tf.float32, [None, None], name='predicate_ids')
self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')
self.lengths = tf.placeholder(tf.int32, [None], name='lengths')
inputs = tf.concat([embedded, tf.expand_dims(self.predicate_ids, -1)], axis=2, name='lstm_inputs')
final_outputs = bilstms_interleaved(inputs,
config.num_layers,
config.cell_size,
self.keep_prob,
self.lengths)
# projection
shape0 = tf.shape(final_outputs)[0] * tf.shape(final_outputs)[1] # both batch_size and seq_len are dynamic
final_outputs_2d = tf.reshape(final_outputs, [shape0, config.cell_size], name='final_outputs_2d')
wy = tf.get_variable('Wy', [config.cell_size, num_labels])
by = tf.get_variable('by', [num_labels])
logits = tf.nn.xw_plus_b(final_outputs_2d, wy, by, name='logits') # need [shape0, num_labels]
# loss
self.label_ids = tf.placeholder(tf.int32, [None, None], name='label_ids') # [batch_size, max_seq_len]
label_ids_flat = tf.reshape(self.label_ids, [-1]) # need [shape0]
mask = tf.greater(label_ids_flat, 0, 'mask')
self.nonzero_label_ids_flat = tf.boolean_mask(label_ids_flat, mask,
name='nonzero_label_ids_flat') # removes elements
nonzero_logits = tf.boolean_mask(logits, mask, name='nonzero_logits')
nonzero_losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=nonzero_logits,
labels=self.nonzero_label_ids_flat,
name='nonzero_losses')
self.nonzero_mean_loss = tf.reduce_mean(nonzero_losses, name='nonzero_mean_loss')
# update
optimizer = tf.train.AdadeltaOptimizer(learning_rate=config.learning_rate, rho=0.95,epsilon=config.epsilon)
gradients, variables = zip(*optimizer.compute_gradients(self.nonzero_mean_loss))
gradients, _ = tf.clip_by_global_norm(gradients, config.max_grad_norm)
self.update = optimizer.apply_gradients(zip(gradients, variables), name='update')
# predictions
self.nonzero_predicted_label_ids = tf.cast(tf.argmax(tf.nn.softmax(nonzero_logits), axis=1), tf.int32,
name='nonzero_predicted_label_ids')
# tensorboard
tf.summary.scalar('nonzero_accuracy', tf.reduce_mean(tf.cast(tf.equal(self.nonzero_predicted_label_ids,
self.nonzero_label_ids_flat),
tf.float32)))
tf.summary.scalar('nonzero_mean_xe', self.nonzero_mean_loss)
self.scalar_summaries = tf.summary.merge_all()
p = Path("/content/drive/My Drive/DLSRL/Tensorboard log")
self.train_writer = tf.summary.FileWriter(str(p), g)
# confusion matrix
nonzero_cm = tf.confusion_matrix(self.nonzero_label_ids_flat, self.nonzero_predicted_label_ids)
size = tf.shape(nonzero_cm)[0]
self.cm_summary = tf.summary.image('nonzero_cm', tf.reshape(tf.cast(nonzero_cm, tf.float32),
[1, size, size, 1])) # needs 4d
这是加载模型并继续对其进行训练的代码。
time_of_init = datetime.datetime.now().strftime('%m-%d-%H-%M')
runs_dir = Path("/content/drive/My Drive/DLSRL/Model/newtraining")
if not runs_dir.is_dir():
runs_dir.mkdir()
json.dump(d, (runs_dir / 'config_{}.json'.format(time_of_init)).open('w'), ensure_ascii=False)
print('Saved configs to {}'.format(runs_dir))
# data
train_data, dev_data, word_dict, label_dict, embeddings = get_data(
config, TRAIN_DATA_PATH, DEV_DATA_PATH)
# train loop
epoch_step = 0
global_step = 35
epoch_loss_sum = 0.0
global_start = time.time()
saver = tf.train.import_meta_graph('./drive/My Drive/DLSRL/Model/04-03-16-38_epoch_35.ckpt.meta')
g = tf.get_default_graph()
with g.as_default():
model = Model(config, embeddings, label_dict.size(), g)
sess = tf.Session(graph=g, config=tf.ConfigProto(allow_soft_placement=True,
log_device_placement=False))
saver.restore(sess, tf.train.latest_checkpoint('./drive/My Drive/DLSRL/Model/'))
sess.run(tf.global_variables_initializer())
#init_vars = tf.initialize_variables([var1, var2])
#sess.run(init_vars)
ckpt_saver = tf.train.Saver(max_to_keep=config.max_epochs)
for epoch in range(35,config.max_epochs):
# save chckpoint from which to load model
path = runs_dir / "{}_epoch_{}.ckpt".format(time_of_init, epoch)
ckpt_saver.save(sess, str(path))
print('Saved checkpoint.')
evaluate(dev_data, model, sess, epoch, global_step)
x1, x2, y = shuffle_stack_pad(train_data, config.train_batch_size)
epoch_start = time.time()
for x1_b, x2_b, y_b in get_batches(x1, x2, y, config.train_batch_size):
feed_dict = make_feed_dict(x1_b, x2_b, y_b, model, config.keep_prob)
if epoch_step % LOSS_INTERVAL == 0:
# tensorboard
run_options = tf.RunOptions(trace_level=tf.RunOptions.NO_TRACE)
scalar_summaries = sess.run(model.scalar_summaries,
feed_dict=feed_dict,
options=run_options)
model.train_writer.add_summary(scalar_summaries, global_step)
# print info
print("step {:>6} epoch {:>3}: loss={:1.3f}, epoch sec={:3.0f}, total hrs={:.1f}".format(
epoch_step,
epoch,
epoch_loss_sum / max(epoch_step, 1),
(time.time() - epoch_start),
(time.time() - global_start) / 3600))
loss, _ = sess.run([model.nonzero_mean_loss, model.update], feed_dict=feed_dict)
epoch_loss_sum+= loss
epoch_step += 1
global_step += 1
epoch_step = 0
epoch_loss_sum = 0.0
谢谢。