我试图将蒸馏应用于yolo目标检测。对于损失计算,我使用具有输入(* model_body.output,* y_true,* l_true)和具有零值的虚拟损失的lambda损失函数。 y_true是数据集中的真实标签,l_true是来自keras模型教师预测的标签。损失函数是标准的yolo损失,具有alpha,以平衡y_true损失和l_true损失之间的损失。
问题是,在没有l_true的情况下训练时,损失是稳定的并且准确性是正常的;但是在使用l_true的训练时,损失会变得不稳定并且准确性下降一半。即使我将alpha更改为0,所以l_true的值也将是0,并且仅使用y_true的值,结果仍然很糟糕。为什么即使损耗值相同,精度也会下降?我尝试冻结教师层,但结果没有改变
培训y_true
Epoch 20/30
104/104 [==============================] - 348s 3s/step - loss: 21.1927 - val_loss: 23.4338
Epoch 21/30
104/104 [==============================] - 343s 3s/step - loss: 20.5408 - val_loss: 22.5973
Epoch 22/30
104/104 [==============================] - 346s 3s/step - loss: 20.7698 - val_loss: 21.9262
Epoch 23/30
104/104 [==============================] - 347s 3s/step - loss: 20.0793 - val_loss: 22.9803
Epoch 24/30
104/104 [==============================] - 344s 3s/step - loss: 19.7196 - val_loss: 24.5589
Epoch 25/30
104/104 [==============================] - 347s 3s/step - loss: 19.7269 - val_loss: 23.0470
Epoch 26/30
104/104 [==============================] - 345s 3s/step - loss: 19.2466 - val_loss: 22.0419
Epoch 27/30
104/104 [==============================] - 347s 3s/step - loss: 19.2453 - val_loss: 20.5318
Epoch 28/30
104/104 [==============================] - 345s 3s/step - loss: 18.7954 - val_loss: 22.2814
Epoch 29/30
104/104 [==============================] - 349s 3s/step - loss: 18.5808 - val_loss: 20.9849
Epoch 30/30
104/104 [==============================] - 343s 3s/step - loss: 18.6739 - val_loss: 21.3272
mAP: 0.1424
使用y_true和l_true进行训练
Epoch 21/30
104/104 [==============================] - 499s 5s/step - loss: 14.8315 - val_loss: 42.2952
Epoch 22/30
104/104 [==============================] - 500s 5s/step - loss: 14.2078 - val_loss: 36.7588
Epoch 23/30
104/104 [==============================] - 501s 5s/step - loss: 13.4158 - val_loss: 37.4477
Epoch 24/30
104/104 [==============================] - 498s 5s/step - loss: 13.4372 - val_loss: 35.8958
Epoch 25/30
104/104 [==============================] - 501s 5s/step - loss: 12.5211 - val_loss: 42.3112
Epoch 26/30
104/104 [==============================] - 500s 5s/step - loss: 12.3828 - val_loss: 40.8860
Epoch 27/30
104/104 [==============================] - 499s 5s/step - loss: 12.6398 - val_loss: 38.1945
Epoch 28/30
104/104 [==============================] - 497s 5s/step - loss: 17.1009 - val_loss: 37.3391
Epoch 29/30
104/104 [==============================] - 502s 5s/step - loss: 17.7571 - val_loss: 38.2577
Epoch 30/30
104/104 [==============================] - 498s 5s/step - loss: 14.7625 - val_loss: 36.2055
mAP: 0.0198
没有l_true的发电机
def data_generator_double(annotation_lines, batch_size, input_shape, anchors, num_classes):
'''data generator for fit_generator'''
n = len(annotation_lines)
i = 0
while True:
image_data = []
box_data = []
for b in range(batch_size):
if i==0:
np.random.shuffle(annotation_lines)
image, box = get_random_data(annotation_lines[i], input_shape, random=True)
image_data.append(image)
box_data.append(box)
i = (i+1) % n
image_data = np.array(image_data)
box_data = np.array(box_data)
y_true = preprocess_true_boxes(box_data, input_shape, anchors, num_classes)
yield [image_data, *y_true, *y_true], np.zeros(batch_size)
不具有l_true的损失函数
y_true = [Input(shape=(h//{0:32, 1:16, 2:8}[l], w//{0:32, 1:16, 2:8}[l], \
num_anchors//3, num_classes+5)) for l in range(3)]
model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss',
arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.7})(
[*model_body.output, *y_true])
model = Model([model_body.input, *y_true], model_loss)
def test_yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False):
num_layers = len(anchors)//3 # default setting
yolo_outputs = args[:num_layers]#yolo output
y_true = args[num_layers:]
anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]]
input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))
grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers)]
loss = 0
m = K.shape(yolo_outputs[0])[0] # batch size, tensor
mf = K.cast(m, K.dtype(yolo_outputs[0]))
for l in range(num_layers):
xy_loss , wh_loss , confidence_loss ,class_loss = basic_yolo_loss(yolo_outputs[l],y_true[l], anchors[anchor_mask[l]], num_classes , ignore_thresh ,input_shape,grid_shapes[l],m,mf)
loss += xy_loss + wh_loss + confidence_loss + class_loss
if print_loss:
loss = tf.Print(loss, [loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum(ignore_mask)], message=' loss: ')
return loss
具有l_true的发电机
teacher = teacher_body(image_input, num_anchors//3, num_classes)
teacher.load_weights(teacher_path)
# return the constructed network architecture
# class+5
yolo3 = Reshape((13, 13, 3, 25))(teacher.layers[-3].output)
yolo2 = Reshape((26, 26, 3, 25))(teacher.layers[-2].output)
yolo1 = Reshape((52, 52, 3, 25))(teacher.layers[-1].output)
teacher = Model( inputs= teacher.input , outputs=[yolo3,yolo2,yolo1] )
for i in range(len( teacher.layers ) ): teacher.layers[i].trainable = False
teacher._make_predict_function()
def data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes,teacher):
'''data generator for fit_generator'''
n = len(annotation_lines)
i = 0
while True:
image_data = []
box_data = []
for b in range(batch_size):
if i==0:
np.random.shuffle(annotation_lines)
image, box = get_random_data(annotation_lines[i], input_shape, random=True)
image_data.append(image)
box_data.append(box)
i = (i+1) % n
image_data = np.array(image_data)
box_data = np.array(box_data)
y_true = preprocess_true_boxes(box_data, input_shape, anchors, num_classes)
m_true = teacher.predict(image_data)
h, w = input_shape
num_anchors = len(anchors)
l_true = [ np.zeros( shape=( batch_size ,416//{0:32, 1:16, 2:8}[l], 416//{0:32, 1:16, 2:8}[l], 9//3, 20+5) ) for l in range(3) ]
anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if len(m_true)==3 else [[3,4,5], [1,2,3]]
for l in range( len(m_true) ) :
'''
pred_output = tf.Variable(m_true[l])
anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if len(m_true)==3 else [[3,4,5], [1,2,3]]
pred_xy, pred_wh , pred_conf , pred_class = yolo_head( pred_output ,anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=False)
pred_model = K.concatenate([pred_xy, pred_wh, pred_conf ,pred_class ])
with tf.Session() as sess:
init = tf.global_variables_initializer()
sess.run(init)
pred_model = pred_model.eval()
'''
anchors_tensor = np.reshape( anchors[anchor_mask[l]] , [1, 1, 1, len( anchors[anchor_mask[l]] ) , 2] )
grid_shape = m_true[l].shape[1:3] # height, width
grid_shape
grid_y = np.tile(np.reshape(np.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
[1, grid_shape[1], 1, 1])
grid_x = np.tile(np.reshape(np.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
[grid_shape[0], 1, 1, 1])
grid = np.concatenate([grid_x, grid_y],axis=-1)
#print(l)
m_true[l][...,:2] = (sigmoid(m_true[l][...,:2]) + grid ) / np.array( grid_shape[::-1] )
m_true[l][..., 2:4] = np.exp(m_true[l][..., 2:4]) * anchors_tensor / np.array( input_shape[::-1] )
m_true[l][..., 4] = sigmoid(m_true[l][..., 4])
m_true[l][..., 5:] = sigmoid(m_true[l][..., 5:])
#print("inside")
box = np.where(y_true[l][...,4] > 0.5 )
box = np.transpose(box)
for i in range(len(box)):
l_true[l][tuple(box[i])] = m_true[l][tuple(box[i])]
yield [image_data, *y_true , *l_true ], np.zeros(batch_size)
具有l_true的损失函数
y_true = [Input(shape=(h//{0:32, 1:16, 2:8}[l], w//{0:32, 1:16, 2:8}[l], \
num_anchors//3, num_classes+5)) for l in range(3)]
l_true = [Input(shape=(h//{0:32, 1:16, 2:8}[l], w//{0:32, 1:16, 2:8}[l], \
num_anchors//3, num_classes+5)) for l in range(3)]
model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss',
arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5})(
[*model_body.output, *y_true , *l_true])
model = Model([model_body.input, *y_true , *l_true ], model_loss)
def yolo_distill_loss(args, anchors, num_classes, ignore_thresh=.5, alpha = 0, print_loss=False):
num_layers = len(anchors)//3 # default setting
yolo_outputs = args[:num_layers]#yolo output
y_true = args[num_layers:num_layers*2]
l_true = args[num_layers*2:]
anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]]
input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))
grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers)]
loss = 0
m = K.shape(yolo_outputs[0])[0] # batch size, tensor
mf = K.cast(m, K.dtype(yolo_outputs[0]))
for l in range(num_layers):
#teacher
xy_loss , wh_loss , confidence_loss ,class_loss , ignore_mask = basic_yolo_loss(yolo_outputs[l],l_true[l], anchors[anchor_mask[l]], num_classes , ignore_thresh ,input_shape,grid_shapes[l],m,mf)
loss += ( alpha * (xy_loss + wh_loss + confidence_loss + class_loss) )
#loss = tf.Print(loss, [loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum(ignore_mask)], message=' loss teacher: ')
#student
xy_loss , wh_loss , confidence_loss ,class_loss , ignore_mask = basic_yolo_loss(yolo_outputs[l],y_true[l], anchors[anchor_mask[l]], num_classes , ignore_thresh ,input_shape,grid_shapes[l],m,mf)
loss += ( (1-alpha) * (xy_loss + wh_loss + confidence_loss + class_loss) )
#loss = tf.Print(loss, [loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum(ignore_mask)], message=' loss student: ')
if print_loss:
loss = tf.Print(loss, [loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum(ignore_mask)], message=' loss: ')
return loss