我是神经网络的新手,我正在研究图像分割问题(单类和多类),并且使用经典的U-net模型并通过MobileNetV2的瓶颈模块进行了改进,因此效果很好。现在,我尝试使用与我用于训练U-net修改(只是将模型构造函数更改为新的)相同的训练数据和代码,使用从此article提取的Fast-SCNN分割图像-SCNN既不训练数据也不适合单个图像。与本文代码最大的不同是,我修复了一些激活层问题,并将输入图像的分辨率更改为512x512。
我正在使用tensorflow 1.13.1和Python 3.7
我尝试过:
没有任何效果。我认为瓶颈可能存在问题,并在U-net中对其进行了测试,但它们确实有效。一些其他信息:
所以问题是为什么Fast-SCNN无法学习?
当使用分类交叉熵和RMSprop(具有不同损耗和优化器的相似结果)训练Fast-SCNN时,我得到的是:
Epoch 1/100
1/1 [==============================] - 9s 9s/sample - loss: 4536.6006 - acc: 0.0000e+00 - dice_coeff: 0.0030 - jaccard_coef: 0.0015 - my_dice: 0.0030
Epoch 2/100
1/1 [==============================] - 1s 836ms/sample - loss: 4437.3359 - acc: 0.0000e+00 - dice_coeff: 0.0041 - jaccard_coef: 0.0021 - my_dice: 0.0041
Epoch 3/100
1/1 [==============================] - 1s 809ms/sample - loss: 4381.8257 - acc: 0.0000e+00 - dice_coeff: 0.0048 - jaccard_coef: 0.0024 - my_dice: 0.0048
Epoch 4/100
1/1 [==============================] - 0s 495ms/sample - loss: 4399.3774 - acc: 0.0000e+00 - dice_coeff: 0.0046 - jaccard_coef: 0.0023 - my_dice: 0.0046
Epoch 5/100
1/1 [==============================] - 1s 819ms/sample - loss: 4350.6323 - acc: 0.0000e+00 - dice_coeff: 0.0051 - jaccard_coef: 0.0026 - my_dice: 0.0051
Epoch 6/100
1/1 [==============================] - 0s 491ms/sample - loss: 4367.3389 - acc: 0.0000e+00 - dice_coeff: 0.0050 - jaccard_coef: 0.0025 - my_dice: 0.0049
Epoch 7/100
1/1 [==============================] - 0s 493ms/sample - loss: 4374.4238 - acc: 0.0000e+00 - dice_coeff: 0.0049 - jaccard_coef: 0.0024 - my_dice: 0.0049
Epoch 8/100
1/1 [==============================] - 0s 486ms/sample - loss: 4401.6699 - acc: 0.0000e+00 - dice_coeff: 0.0046 - jaccard_coef: 0.0023 - my_dice: 0.0046
Epoch 9/100
1/1 [==============================] - 1s 826ms/sample - loss: 4349.6147 - acc: 0.0000e+00 - dice_coeff: 0.0052 - jaccard_coef: 0.0026 - my_dice: 0.0051
Epoch 10/100
1/1 [==============================] - 0s 492ms/sample - loss: 4440.0439 - acc: 0.0000e+00 - dice_coeff: 0.0042 - jaccard_coef: 0.0021 - my_dice: 0.0042
我的模特:
import tensorflow as tf
from losses import *
"""
# Model Architecture
#### Custom function for conv2d: conv_block
"""
def conv_block(inputs, conv_type, kernel, kernel_size, strides, padding='same', relu=True):
if(conv_type == 'ds'):
x = tf.keras.layers.SeparableConv2D(kernel, kernel_size, padding=padding, strides = strides)(inputs)
else:
x = tf.keras.layers.Conv2D(kernel, kernel_size, padding=padding, strides = strides)(inputs)
x = tf.keras.layers.BatchNormalization()(x)
if (relu):
x = tf.keras.layers.ReLU()(x)
return x
"""## Step 2: Global Feature Extractor
#### residual custom method
"""
def _res_bottleneck(inputs, filters, kernel, t, s, r=False):
tchannel = tf.keras.backend.int_shape(inputs)[-1] * t
x = conv_block(inputs, 'conv', tchannel, (1, 1), strides=(1, 1))
x = tf.keras.layers.DepthwiseConv2D(kernel, strides=(s, s), depth_multiplier=1, padding='same')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.ReLU()(x)
x = conv_block(x, 'conv', filters, (1, 1), strides=(1, 1), padding='same', relu=False)
if r:
x = tf.keras.layers.add([x, inputs])
return x
"""#### Bottleneck custom method"""
def bottleneck_block(inputs, filters, kernel, t, strides, n):
x = _res_bottleneck(inputs, filters, kernel, t, strides)
for i in range(1, n):
x = _res_bottleneck(x, filters, kernel, t, 1, True)
return x
"""#### PPM Method"""
def pyramid_pooling_block(input_tensor, bin_sizes):
concat_list = [input_tensor]
w = 16
h = 16
for bin_size in bin_sizes:
x = tf.keras.layers.AveragePooling2D(pool_size=(w//bin_size, h//bin_size), strides=(w//bin_size, h//bin_size))(input_tensor)
x = tf.keras.layers.Conv2D(128, 3, 2, padding='same')(x)
x = tf.keras.layers.Lambda(lambda x: tf.image.resize(x, (w,h)))(x)
concat_list.append(x)
return tf.keras.layers.concatenate(concat_list)
def model(pretrained_weights=None, classes = 4):
"""## Step 1: Learning to DownSample"""
# Input Layer
input_layer = tf.keras.layers.Input(shape=(512* 512* 3,), name = 'input_layer')
reshape = tf.keras.layers.Reshape((512,512,3))(input_layer)
lds_layer = conv_block(reshape, 'conv', 32, (3, 3), strides = (2, 2))
lds_layer = conv_block(lds_layer, 'ds', 48, (3, 3), strides = (2, 2))
lds_layer = conv_block(lds_layer, 'ds', 64, (3, 3), strides = (2, 2))
"""#### Assembling all the methods"""
gfe_layer = bottleneck_block(lds_layer, 64, (3, 3), t=6, strides=2, n=3)
gfe_layer = bottleneck_block(gfe_layer, 96, (3, 3), t=6, strides=2, n=3)
gfe_layer = bottleneck_block(gfe_layer, 128, (3, 3), t=6, strides=1, n=3)
gfe_layer = pyramid_pooling_block(gfe_layer, [2,4,6,8])
"""## Step 3: Feature Fusion"""
ff_layer1 = conv_block(lds_layer, 'conv', 128, (1,1), padding='same', strides= (1,1), relu=False)
ff_layer2 = tf.keras.layers.UpSampling2D((4, 4))(gfe_layer)
ff_layer2 = tf.keras.layers.DepthwiseConv2D(128, strides=(1, 1), depth_multiplier=1, padding='same')(ff_layer2)
ff_layer2 = tf.keras.layers.BatchNormalization()(ff_layer2)
ff_layer2 = tf.keras.layers.ReLU()(ff_layer2)
ff_layer2 = tf.keras.layers.Conv2D(128, 1, 1, padding='same', activation=None)(ff_layer2)
ff_final = tf.keras.layers.add([ff_layer1, ff_layer2])
ff_final = tf.keras.layers.BatchNormalization()(ff_final)
ff_final = tf.keras.layers.ReLU()(ff_final)
"""## Step 4: Classifier"""
classifier = tf.keras.layers.SeparableConv2D(128, (3, 3), padding='same', strides = (1, 1), name = 'DSConv1_classifier')(ff_final)
classifier = tf.keras.layers.BatchNormalization()(classifier)
classifier = tf.keras.layers.ReLU()(classifier)
classifier = tf.keras.layers.SeparableConv2D(128, (3, 3), padding='same', strides = (1, 1), name = 'DSConv2_classifier')(classifier)
classifier = tf.keras.layers.BatchNormalization()(classifier)
classifier = tf.keras.layers.ReLU()(classifier)
classifier = conv_block(classifier, 'conv', classes, (1, 1), strides=(1, 1), padding='same', relu=True)
classifier = tf.keras.layers.Dropout(0.3)(classifier)
classifier = tf.keras.layers.UpSampling2D((8, 8))(classifier)
classifier = tf.keras.layers.Conv2D(classes,1,activation='sigmoid', padding='same')(classifier)
classifier = tf.keras.layers.Reshape((512*512*classes,))(classifier)
"""## Model Compilation"""
fast_scnn = tf.keras.Model(inputs = input_layer , outputs = classifier, name = 'Fast_SCNN')
optimizer = tf.keras.optimizers.RMSprop(lr=1e-4)
fast_scnn.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy', dice_coeff, jaccard_coef, my_dice])
fast_scnn.summary()
if(pretrained_weights):
fast_scnn.load_weights(pretrained_weights)
return fast_scnn
我使用的损失和指标集合(希望有人可以找到有用的方法:))
from tensorflow.keras.losses import binary_crossentropy
import tensorflow.keras.backend as K
import tensorflow as tf
import numpy as np
smooth = 1e-12
def dice_coeff(y_true, y_pred):
smooth = 1.
y_true_f = K.flatten(y_true)
y_pred_f = K.flatten(y_pred)
intersection = K.sum(y_true_f * y_pred_f)
score = (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
return score
def my_dice(y_true, y_pred):
smooth = 1e-6
intersection = K.sum(y_true * y_pred, axis=-1)
score = (2. * intersection + smooth) / (K.sum(y_true, axis=-1) + K.sum(y_pred,axis=-1) + smooth)
return score
def my_dice_loss(y_true, y_pred):
return 1 - my_dice(y_true, y_pred)
def dice_loss(y_true, y_pred):
loss = 1 - dice_coeff(y_true, y_pred)
return loss
def bce_dice_loss(y_true, y_pred):
loss = binary_crossentropy(y_true, y_pred) + dice_loss(y_true, y_pred)
return loss
# weighted losses not tested
def weighted_dice_coeff(y_true, y_pred, weight):
smooth = 1.
w, m1, m2 = weight * weight, y_true, y_pred
intersection = (m1 * m2)
score = (2. * K.sum(w * intersection) + smooth) / (K.sum(w * m1) + K.sum(w * m2) + smooth)
return score
def weighted_dice_loss(y_true, y_pred):
y_true = K.cast(y_true, 'float32')
y_pred = K.cast(y_pred, 'float32')
# if we want to get same size of output, kernel size must be odd number
if K.int_shape(y_pred)[1] == 128:
kernel_size = 11
elif K.int_shape(y_pred)[1] == 256:
kernel_size = 21
elif K.int_shape(y_pred)[1] == 512:
kernel_size = 21
elif K.int_shape(y_pred)[1] == 1024:
kernel_size = 41
else:
raise ValueError('Unexpected image size')
averaged_mask = K.pool2d(
y_true, pool_size=(kernel_size, kernel_size), strides=(1, 1), padding='same', pool_mode='avg')
border = K.cast(K.greater(averaged_mask, 0.005), 'float32') * K.cast(K.less(averaged_mask, 0.995), 'float32')
weight = K.ones_like(averaged_mask)
w0 = K.sum(weight)
weight += border * 2
w1 = K.sum(weight)
weight *= (w0 / w1)
loss = 1 - weighted_dice_coeff(y_true, y_pred, weight)
return loss
def weighted_bce_loss(y_true, y_pred, weight):
# avoiding overflow
epsilon = 1e-7
y_pred = K.clip(y_pred, epsilon, 1. - epsilon)
logit_y_pred = K.log(y_pred / (1. - y_pred))
# https://www.tensorflow.org/api_docs/python/tf/nn/weighted_cross_entropy_with_logits
loss = (1. - y_true) * logit_y_pred + (1. + (weight - 1.) * y_true) * \
(K.log(1. + K.exp(-K.abs(logit_y_pred))) + K.maximum(-logit_y_pred, 0.))
return K.sum(loss) / K.sum(weight)
def weighted_bce_dice_loss(y_true, y_pred):
y_true = K.cast(y_true, 'float32')
y_pred = K.cast(y_pred, 'float32')
# if we want to get same size of output, kernel size must be odd number
if K.int_shape(y_pred)[1] == 128:
kernel_size = 11
elif K.int_shape(y_pred)[1] == 256:
kernel_size = 21
elif K.int_shape(y_pred)[1] == 512:
kernel_size = 21
elif K.int_shape(y_pred)[1] == 1024:
kernel_size = 41
else:
raise ValueError('Unexpected image size')
averaged_mask = K.pool2d(
y_true, pool_size=(kernel_size, kernel_size), strides=(1, 1), padding='same', pool_mode='avg')
border = K.cast(K.greater(averaged_mask, 0.005), 'float32') * K.cast(K.less(averaged_mask, 0.995), 'float32')
weight = K.ones_like(averaged_mask)
w0 = K.sum(weight)
weight += border * 2
w1 = K.sum(weight)
weight *= (w0 / w1)
loss = weighted_bce_loss(y_true, y_pred, weight) + (1 - weighted_dice_coeff(y_true, y_pred, weight))
return loss
def jaccard_coef(y_true, y_pred):
# __author__ = Vladimir Iglovikov
intersection = K.sum(y_true * y_pred, axis=[0, -1, -2])
sum_ = K.sum(y_true + y_pred, axis=[0, -1, -2])
jac = (intersection + smooth) / (sum_ - intersection + smooth)
return K.mean(jac)
def jaccard_coef_loss(y_true, y_pred):
# __author__ = Vladimir Iglovikov
intersection = K.sum(y_true * y_pred, axis=[0, -1, -2])
sum_ = K.sum(y_true + y_pred, axis=[0, -1, -2])
jac = (intersection + smooth) / (sum_ - intersection + smooth)
return 1 - K.mean(jac)
def jaccard_coef_int(y_true, y_pred):
# __author__ = Vladimir Iglovikov
y_pred_pos = K.round(K.clip(y_pred, 0, 1))
intersection = K.sum(y_true * y_pred_pos, axis=[0, -1, -2])
sum_ = K.sum(y_true + y_pred, axis=[0, -1, -2])
jac = (intersection + smooth) / (sum_ - intersection + smooth)
return K.mean(jac)
def jaccard_distance(y_true, y_pred, smooth=100):
"""Jaccard distance for semantic segmentation.
Also known as the intersection-over-union loss.
This loss is useful when you have unbalanced numbers of pixels within an image
because it gives all classes equal weight. However, it is not the defacto
standard for image segmentation.
For example, assume you are trying to predict if
each pixel is cat, dog, or background.
You have 80% background pixels, 10% dog, and 10% cat.
If the model predicts 100% background
should it be be 80% right (as with categorical cross entropy)
or 30% (with this loss)?
The loss has been modified to have a smooth gradient as it converges on zero.
This has been shifted so it converges on 0 and is smoothed to avoid exploding
or disappearing gradient.
Jaccard = (|X & Y|)/ (|X|+ |Y| - |X & Y|)
= sum(|A*B|)/(sum(|A|)+sum(|B|)-sum(|A*B|))
# Arguments
y_true: The ground truth tensor.
y_pred: The predicted tensor
smooth: Smoothing factor. Default is 100.
# Returns
The Jaccard distance between the two tensors.
# References
- [What is a good evaluation measure for semantic segmentation?](
http://www.bmva.org/bmvc/2013/Papers/paper0032/paper0032.pdf)
"""
intersection = K.sum(K.abs(y_true * y_pred), axis=-1)
sum_ = K.sum(K.abs(y_true) + K.abs(y_pred), axis=-1)
jac = (intersection + smooth) / (sum_ - intersection + smooth)
return (1 - jac) * smooth
def tversky_loss(y_true, y_pred, alpha=0.3, beta=0.7, smooth=1e-10):
""" Tversky loss function.
Parameters
----------
y_true : keras tensor
tensor containing target mask.
y_pred : keras tensor
tensor containing predicted mask.
alpha : float
real value, weight of '0' class.
beta : float
real value, weight of '1' class.
smooth : float
small real value used for avoiding division by zero error.
Returns
-------
keras tensor
tensor containing tversky loss.
"""
y_true = K.flatten(y_true)
y_pred = K.flatten(y_pred)
truepos = K.sum(y_true * y_pred)
fp_and_fn = alpha * K.sum(y_pred * (1 - y_true)) + beta * K.sum((1 - y_pred) * y_true)
answer = (truepos + smooth) / ((truepos + smooth) + fp_and_fn)
return answer
def focal_loss(gamma=2., alpha=.25):
def focal_loss_fixed(y_true, y_pred):
pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
return -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1))-K.sum((1-alpha) * K.pow( pt_0, gamma) * K.log(1. - pt_0))
return focal_loss_fixed
# can't find out why cause error with axes
def soft_dice_loss(y_true, y_pred, epsilon=1e-6):
'''
Soft dice loss calculation for arbitrary batch size, number of classes, and number of spatial dimensions.
Assumes the `channels_last` format.
# Arguments
y_true: b x X x Y( x Z...) x c One hot encoding of ground truth
y_pred: b x X x Y( x Z...) x c Network output, must sum to 1 over c channel (such as after softmax)
epsilon: Used for numerical stability to avoid divide by zero errors
# References
V-Net: Fully Convolutional Neural Networks for Volumetric Medical Image Segmentation
https://arxiv.org/abs/1606.04797
More details on Dice loss formulation
https://mediatum.ub.tum.de/doc/1395260/1395260.pdf (page 72)
Adapted from https://github.com/Lasagne/Recipes/issues/99#issuecomment-347775022
'''
# skip the batch and class axis for calculating Dice score
axes = tuple(range(0, len(y_pred.shape)-1))
numerator = 2. * np.sum(y_pred * y_true, axes)
denominator = np.sum(np.square(y_pred) + np.square(y_true), axes)
return 1 - np.mean(numerator / (denominator + epsilon)) # average over classes and batch