我正在尝试在Tensorflow中实现大余弦余弦损失。我找到了以下实现此目的的类:
import math
import numpy as np
import tensorflow as tf
from tensorflow import keras
import tensorflow.keras.backend as K
from tensorflow.keras.layers import Layer
from tensorflow.keras.initializers import Constant
from tensorflow.python.keras.utils import tf_utils
def _resolve_training(layer, training):
if training is None:
training = K.learning_phase()
if isinstance(training, int):
training = bool(training)
if not layer.trainable:
# When the layer is not trainable, override the value
training = False
return tf_utils.constant_value(training)
class CosFace(keras.layers.Layer):
"""
Implementation of CosFace layer. Reference: https://arxiv.org/abs/1801.09414
Arguments:
num_classes: number of classes to classify
s: scale factor
m: margin
regularizer: weights regularizer
"""
def __init__(self,
num_classes,
s=30.0,
m=0.35,
regularizer=None,
name='cosface',
**kwargs):
super().__init__(name=name, **kwargs)
self._n_classes = num_classes
self._s = float(s)
self._m = float(m)
self._regularizer = regularizer
def build(self, input_shape):
embedding_shape, label_shape = input_shape
self._w = self.add_weight(shape=(embedding_shape[-1], self._n_classes),
initializer='glorot_uniform',
trainable=True,
regularizer=self._regularizer)
def call(self, inputs, training=None):
"""
During training, requires 2 inputs: embedding (after backbone+pool+dense),
and ground truth labels. The labels should be sparse (and use
sparse_categorical_crossentropy as loss).
"""
print('calling CosFace Layer...')
embedding, label = inputs
# Squeezing is necessary for Keras. It expands the dimension to (n, 1)
label = tf.reshape(int(label), [-1], name='label_shape_correction')
# Normalize features and weights and compute dot product
x = tf.nn.l2_normalize(embedding, axis=1, name='normalize_prelogits')
w = tf.nn.l2_normalize(self._w, axis=0, name='normalize_weights')
cosine_sim = tf.matmul(x, w, name='cosine_similarity')
training = _resolve_training(self, training)
if not training:
# We don't have labels if we're not in training mode
return self._s * cosine_sim
else:
one_hot_labels = tf.one_hot(label,
depth=self._n_classes,
name='one_hot_labels')
theta = tf.math.acos(K.clip(
cosine_sim, -1.0 + K.epsilon(), 1.0 - K.epsilon()))
final_theta = tf.where(tf.cast(one_hot_labels, dtype=tf.bool),
tf.math.cos(theta) - self._m,
tf.math.cos(theta),
name='final_theta')
print(final_theta)
output = tf.math.cos(final_theta, name='cosine_sim_with_margin')
return self._s * output
我正在MNIST数据集上训练的简单CNN上对其进行测试。但是火车不行。这是网络体系结构:
label = keras.layers.Input((), name="input/labels")
input = keras.layers.Input(shape=[28,28,1], name="input/image")
margin = CosFace(num_classes=10, dtype='float32')
x = keras.layers.Conv2D(64, (3,3), padding="same")(input)
x = keras.layers.Activation("relu")(x)
x = keras.layers.MaxPooling2D((2,2))(x)
x = keras.layers.Conv2D(32, (3,3), padding="same")(x)
x = keras.layers.Activation("relu")(x)
x = keras.layers.MaxPooling2D(pool_size=(2,2))(x)
x = keras.layers.Conv2D(16, (3,3), padding="same")(x)
x = keras.layers.Activation("relu")(x)
x = keras.layers.MaxPooling2D(pool_size=(2,2))(x)
x = keras.layers.Dropout(0.25)(x)
x = keras.layers.Flatten()(x)
x = keras.layers.Dense(128)(x)
x = keras.layers.Activation("relu", name="dense")(x)
x = keras.layers.Dropout(0.25)(x)
x = margin([x, label])
output = keras.layers.Activation("softmax")(x)
model_cos = keras.Model(inputs=[input, label], outputs=output)
model_cos.compile(loss="sparse_categorical_crossentropy", optimizer=opt, metrics=["accuracy"])
H_cos = model_cos.fit((X_train, y_train), y_train, batch_size=64, epochs=3, verbose=1)
这是输出:
Epoch 1/3
calling CosFace Layer...
Tensor("functional_11/cosface/final_theta:0", shape=(None, 10), dtype=float32)
calling CosFace Layer...
Tensor("functional_11/cosface/final_theta:0", shape=(None, 10), dtype=float32)
860/860 [==============================] - 7s 8ms/step - loss: 0.3194 - accuracy: 0.9751
Epoch 2/3
860/860 [==============================] - 6s 7ms/step - loss: 0.0545 - accuracy: 1.0000
Epoch 3/3
860/860 [==============================] - 6s 7ms/step - loss: 0.0368 - accuracy: 1.0000
我不知道发生了什么,首先真正的精度不是1,其次是在第二个时期之后,不再调用CosFace层。
您对如何解决此问题有任何想法吗?