我正在添加我的RNN文本分类模型。我使用最后一个状态来分类文本。数据集很小我使用手套矢量进行嵌入。
if let stories = responsedict["stories"] as? [Dictionary<String,String>] {
print(stories)
}
def rnn_inputs(FLAGS, input_data):
with tf.variable_scope('rnn_inputs', reuse=True):
W_input = tf.get_variable("W_input", [FLAGS.en_vocab_size, FLAGS.num_hidden_units])
embeddings = tf.nn.embedding_lookup(W_input, input_data)
return embeddings
打印输出
self.inputs_X = tf.placeholder(tf.int32, shape=[None, None, FLAGS.num_dim_input], name='inputs_X')
self.targets_y = tf.placeholder(tf.float32, shape=[None, None], name='targets_y')
self.dropout = tf.placeholder(tf.float32, name='dropout')
self.seq_leng = tf.placeholder(tf.int32, shape=[None, ], name='seq_leng')
with tf.name_scope("RNNcell"):
stacked_cell = rnn_cell(FLAGS, self.dropout)
with tf.name_scope("Inputs"):
with tf.variable_scope('rnn_inputs'):
W_input = tf.get_variable("W_input", [FLAGS.en_vocab_size, FLAGS.num_hidden_units], initializer=tf.truncated_normal_initializer(stddev=0.1))
inputs = rnn_inputs(FLAGS, self.inputs_X)
#initial_state = stacked_cell.zero_state(FLAGS.batch_size, tf.float32)
with tf.name_scope("DynamicRnn"):
# flat_inputs = tf.reshape(inputs, [FLAGS.batch_size, -1, FLAGS.num_hidden_units])
flat_inputs = tf.transpose(tf.reshape(inputs, [-1, FLAGS.batch_size, FLAGS.num_hidden_units]), perm=[1, 0, 2])
all_outputs, state = tf.nn.dynamic_rnn(cell=stacked_cell, inputs=flat_inputs, sequence_length=self.seq_leng, dtype=tf.float32)
outputs = state[0]
with tf.name_scope("Logits"):
with tf.variable_scope('rnn_softmax'):
W_softmax = tf.get_variable("W_softmax", [FLAGS.num_hidden_units, FLAGS.num_classes])
b_softmax = tf.get_variable("b_softmax", [FLAGS.num_classes])
logits = rnn_softmax(FLAGS, outputs)
probabilities = tf.nn.softmax(logits, name="probabilities")
self.accuracy = tf.equal(tf.argmax(self.targets_y,1), tf.argmax(logits,1))
with tf.name_scope("Loss"):
self.loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=self.targets_y))
with tf.name_scope("Grad"):
self.lr = tf.Variable(0.0, trainable=False)
trainable_vars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, trainable_vars), FLAGS.max_gradient_norm)
optimizer = tf.train.AdamOptimizer(self.lr)
self.train_optimizer = optimizer.apply_gradients(zip(grads, trainable_vars))
sampling_outputs = all_outputs[0]
sampling_logits = rnn_softmax(FLAGS, sampling_outputs)
self.sampling_probabilities = tf.nn.softmax(sampling_logits)
165 EPOCH后
EPOCH 7 SUMMARY 40 STEP
Training loss 0.439
Training accuracy 0.247
----------------------
Validation loss 0.452
Validation accuracy 0.234
----------------------
Saving the model.
EPOCH 8 SUMMARY 45 STEP
Training loss 0.429
Training accuracy 0.281
----------------------
Validation loss 0.462
Validation accuracy 0.203
----------------------
Saving the model.
EPOCH 9 SUMMARY 50 STEP
Training loss 0.428
Training accuracy 0.268
----------------------
Validation loss 0.465
Validation accuracy 0.188
----------------------
Saving the model.
EPOCH 10 SUMMARY 55 STEP
Training loss 0.424
Training accuracy 0.284
----------------------
Validation loss 0.455
Validation accuracy 0.172
----------------------
Saving the model.
EPOCH 11 SUMMARY 60 STEP
Training loss 0.421
Training accuracy 0.305
----------------------
Validation loss 0.461
Validation accuracy 0.156
----------------------
Saving the model.
EPOCH 12 SUMMARY 65 STEP
Training loss 0.418
Training accuracy 0.299
----------------------
Validation loss 0.462
Validation accuracy 0.141
----------------------
Saving the model.
EPOCH 13 SUMMARY 70 STEP
Training loss 0.416
Training accuracy 0.286
----------------------
Validation loss 0.462
Validation accuracy 0.156
----------------------
Saving the model.
EPOCH 14 SUMMARY 75 STEP
Training loss 0.413
Training accuracy 0.323
----------------------
Validation loss 0.468
Validation accuracy 0.141
----------------------
Saving the model.
答案 0 :(得分:0)
如果培训损失减少,但验证损失增加,则可能是您遇到过度拟合的问题。这意味着:一般来说,机器学习算法在训练集上表现非常好(即训练损失非常低)并不难。如果算法只记忆训练数据集,它将产生一个完美的分数。
然而,机器学习中的挑战是设计一个在看不见的数据上表现良好的模型,即在训练期间未提供给算法的数据。这是验证集所代表的内容。如果一个模型在看不见的数据上表现良好,我们说它很好地概括了。如果模型在训练数据上表现良好,我们称之为过度拟合。一个不能很好地概括的模型基本上是无用的,因为它没有学习任何关于数据的底层结构,只是记住了训练集。这是没用的,因为训练有素的模型将用于新数据,而且可能永远不会在训练期间使用数据。那你怎么能阻止这个:
要获得更多相关信息,请在线搜索正规化,过度拟合等。