使用Tensorflow Estimators中的model_to_estimator功能已经有一段时间了。问题似乎是Keras允许单个神经元密集输出损失binary_crossentropy
。
就我而言,我正在馈送RNN序列数据,我想弄清楚序列是否导致转换。该代码(也可以在https://colab.research.google.com/drive/194Puigi-LdzxZup6LNREk47l9uP0_Dx9上找到)
import numpy as np
import pandas as pd
import tensorflow as tf
np.random.seed(2)
data = np.random.randint(1,500,size=(10000, 50)) # create something like 50 words out of a vocab of 500
#split
train = data[:7999]
val = data[8000:]
def _input_fn2(arr, batch_size=500, shuffle=False):
arr_copy = arr.copy()
def _parse_func(features):
sum = tf.math.reduce_sum(features)
label = tf.cond(sum >= 15000, lambda: np.array([1]), lambda: np.array([0])) # label=true if sum is larger 15000, gives about 1% true
return (features, label)
dataset = tf.data.Dataset.from_tensor_slices(arr_copy)
dataset = dataset.map(_parse_func)
dataset = dataset.shuffle(200)
dataset = dataset.batch(batch_size)
dataset = dataset.repeat()
return dataset
from tensorflow.keras.layers import Dense, Input, CuDNNGRU, Embedding
import tensorflow.keras.backend as K
inputs = Input(shape=(50,))
embedding = Embedding(
output_dim=5,
input_dim=500,
input_length=50)(inputs)
lstm = CuDNNGRU(
units=5,
input_shape=((5,1)),
return_sequences=False,
)(embedding)
outputs = Dense(1, activation='sigmoid',name='final')(lstm)
model = tf.keras.Model(inputs, outputs)
def true_positives(y_true, y_pred):
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
return true_positives
def false_positives(y_true, y_pred):
true_positives = K.sum(K.round(K.clip((1 - y_true) * y_pred, 0, 1)))
return true_positives
def true_negatives(y_true, y_pred):
true_positives = K.sum(K.round(K.clip((1 - y_true) * (1 - y_pred), 0, 1)))
return true_positives
def false_negatives(y_true, y_pred):
true_positives = K.sum(K.round(K.clip(y_true * (1 - y_pred), 0, 1)))
return true_positives
def recall(y_true, y_pred):
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
recall = true_positives / (possible_positives + K.epsilon())
return recall
def precision(y_true, y_pred):
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
precision = true_positives / (predicted_positives + K.epsilon())
return precision
model.compile(
optimizer=tf.keras.optimizers.Adam(),
loss='binary_crossentropy',
metrics=[
'acc',
true_positives,
true_negatives,
false_positives,
false_negatives,
recall,
precision
]
)
print(model.summary())
train_ds = _input_fn2(train, shuffle=True)
val_ds = _input_fn2(val)
model.fit(
train_ds,
steps_per_epoch=50 ,
epochs=100,
validation_data=val_ds,
validation_steps=10,
verbose=2
)
这很好,模型收敛并开始学习。
史诗100/100 -2秒-损失:3.5754e-04-acc:1.0000-true_positives:3.2000-true_negatives:496.7400-false_positives:0.0000e + 00- false_negatives:0.0000e + 00-召回率:0.9400-精度:0.9400- val_loss:0.1281-val_acc:0.9806-val_true_positives:0.0000e + 00- val_true_negatives:490.3000-val_false_positives:4.5000- val_false_negatives:5.2000-val_recall:0.0000e + 00-val_precision: 0.0000e + 00
您可以看到,大多数情况下它都猜测为负数,这是由于数据集的不平衡,并且可能是正确的做法。
现在将其转换为这样的Estimator模型
from tensorflow.keras.estimator import model_to_estimator
from tensorflow.estimator import train_and_evaluate, RunConfig
from tensorflow.estimator import TrainSpec, EvalSpec
from tensorflow import metrics
from tensorflow.contrib.estimator import add_metrics
run_config = RunConfig(
save_checkpoints_secs=5,
keep_checkpoint_max=10
)
def eval_metrics(features, labels, predictions):
return {
'precision_streaming': metrics.precision(labels=labels, predictions=predictions['final']),
'recall_streaming': metrics.recall(labels=labels, predictions=predictions['final']),
'true_positives_streaming': metrics.true_positives(labels=labels, predictions=predictions['final']),
'true_negatives_streaming': metrics.true_negatives(labels=labels, predictions=predictions['final']),
'false_positives_streaming': metrics.false_positives(labels=labels, predictions=predictions['final']),
'false_negatives_streaming': metrics.false_negatives(labels=labels, predictions=predictions['final'])
}
estimator = model_to_estimator(keras_model=model, config=run_config)
estimator = add_metrics(estimator, eval_metrics) #took out these metrics for showcase
train_spec = TrainSpec(
input_fn=lambda: _input_fn2(train, shuffle=True), max_steps=2000
)
eval_spec = EvalSpec(input_fn=lambda: _input_fn2(val), steps=4)
score = train_and_evaluate(estimator, train_spec, eval_spec)
print(score)
重置模型并训练基于Estimator的版本后,该模型不会收敛,而现在似乎只能预测正确了
({{'binary_accuracy':0.9865,'false_negatives_streaming':0.0, 'false_positives_streaming':1979.0,'precision_streaming':0.0105, 'recall_streaming':1.0,'true_negatives_streaming':0.0, 'true_positives_streaming':21.0,'global_step':2000},[]
现在,我设法通过使用Dense(2)最后一层,对标签进行一次热编码并将损失函数切换为sparse_categorical_crossentropy
来使其正常工作,但是我真的想保留单个输出类,因为它使我的下游f1-得分和其他计算变得更容易。
一个令人鼓舞的猜测是,估计器无法将损耗分配到单个密集输出层,而Keras设法做到了这一点。
任何帮助将不胜感激
最佳威特西