我尝试使用张量流实现简单的点击率预测模型。但是,训练损失减少了,但准确性保持不变。 (实际上,它等于正样本或负样本的比率,似乎该模型预测所有样本的比率为0或1)。我该怎么办才能解决这个问题?
def parse_exmp(serial_exmp):
click = fc.numeric_column("click", dtype=tf.float32)
fea_columns = [click]
fea_columns += my_feature_columns
feature_spec=tf.feature_column.make_parse_example_spec(fea_columns)
feats = tf.parse_single_example(serial_exmp, features=feature_spec)
click = feats.pop('click')
return feats, {'ctr': tf.to_float(click)}
def train_input_fn(filenames, batch_size, shuffle_buffer_size):
dataset = tf.data.TFRecordDataset(filenames)
dataset = dataset.map(parse_exmp, num_parallel_calls=5)
dataset = dataset.repeat().batch(batch_size).prefetch(1000)
return dataset
def eval_input_fn(filename, batch_size):
dataset = tf.data.TFRecordDataset(filename)
dataset = dataset.map(parse_exmp, num_parallel_calls=5)
# Shuffle, repeat, and batch the examples.
dataset = dataset.batch(batch_size)
# Return the read end of the pipeline.
return dataset
def build_model(features, mode, params):
net = fc.input_layer(features, params['feature_columns'])
# Build the hidden layers, sized according to the 'hidden_units' param.
for units in params['hidden_units']:
net = tf.layers.dense(net, units=units, kernel_regularizer=None,
bias_regularizer=None, activation=tf.nn.relu)
if 'dropout_rate' in params and params['dropout_rate'] > 0.0:
net = tf.layers.dropout(net, params['dropout_rate'], training=(mode == tf.estimator.ModeKeys.TRAIN))
# Compute logits
logits = tf.layers.dense(net, 1, activation=None)
return logits
def my_model(features, labels, mode, params):
ctr_logits = build_model(features, mode, params)
ctr_predictions = tf.sigmoid(ctr_logits, name="CTR")
if mode == tf.estimator.ModeKeys.PREDICT:
predictions = ctr_predictions
export_outputs = {
'prediction': tf.estimator.export.PredictOutput(predictions)
}
return tf.estimator.EstimatorSpec(mode, predictions=predictions, export_outputs=export_outputs)
loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(labels=labels['ctr'], logits=ctr_logits), name="loss")
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = tf.train.AdagradOptimizer(learning_rate=params['learning_rate'])
train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
ctr_accuracy = tf.metrics.accuracy(labels=labels['ctr'], predictions=tf.to_float(tf.greater_equal(ctr_predictions, 0.5)))
ctr_auc = tf.metrics.auc(labels['ctr'], ctr_predictions)
metrics = {'ctr_accuracy': ctr_accuracy, 'ctr_auc': ctr_auc}
tf.summary.scalar('ctr_accuracy', ctr_accuracy[1])
tf.summary.scalar('ctr_auc', ctr_auc[1])
if mode == tf.estimator.ModeKeys.EVAL:
return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics)
create_feature_columns()
classifier = tf.estimator.Estimator(
model_fn=my_model,
params={
'feature_columns': my_feature_columns,
'hidden_units': hidden_units.split(','),
'learning_rate': learning_rate,
'dropout_rate': dropout_rate
},
config=tf.estimator.RunConfig(model_dir=model_dir, save_checkpoints_steps=save_checkpoints_steps)
)
batch_size = batch_size
train_spec = tf.estimator.TrainSpec(
input_fn=lambda: train_input_fn(train_files, batch_size, shuffle_buffer_size),
max_steps=train_steps
)
input_fn_for_eval = lambda: eval_input_fn(eval_files, batch_size)
eval_spec = tf.estimator.EvalSpec(input_fn=input_fn_for_eval, throttle_secs = 60, steps=None)
这是日志:
INFO:tensorflow:Running training and evaluation locally (non-distributed).
INFO:tensorflow:Start train and evaluate loop. The evaluate will happen after 60 secs (eval_spec.throttle_secs) or training is finished.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 1 into ./model_dir/model.ckpt.
INFO:tensorflow:loss = 8575.396, step = 1
INFO:tensorflow:Saving checkpoints for 45 into ./model_dir/model.ckpt.
INFO:tensorflow:Loss for final step: 92.291855.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-06-10-07:05:03
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./model_dir/model.ckpt-45
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-06-10-07:05:17
INFO:tensorflow:Saving dict for global step 45: ctr_accuracy = 0.436, ctr_auc = 0.5, global_step = 45, loss = 6467.209
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./model_dir/model.ckpt-45
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 46 into ./model_dir/model.ckpt.
INFO:tensorflow:loss = 1430.6135, step = 46
INFO:tensorflow:Saving checkpoints for 87 into ./model_dir/model.ckpt.
INFO:tensorflow:Loss for final step: 469.1917.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-06-10-07:06:44
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./model_dir/model.ckpt-87
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-06-10-07:06:58
INFO:tensorflow:Saving dict for global step 87: ctr_accuracy = 0.436, ctr_auc = 0.5, global_step = 87, loss = 3759.593
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./model_dir/model.ckpt-87
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 88 into ./model_dir/model.ckpt.
INFO:tensorflow:loss = 938.0892, step = 88
INFO:tensorflow:Saving checkpoints for 131 into ./model_dir/model.ckpt.
INFO:tensorflow:Loss for final step: 3.707542e-12.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-06-10-07:08:26
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./model_dir/model.ckpt-131
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-06-10-07:08:39
INFO:tensorflow:Saving dict for global step 131: ctr_accuracy = 0.436, ctr_auc = 0.5, global_step = 131, loss = 2633.0947