Question

我正在尝试在Keras中训练模型进行图像分割（U-Net），然后首先生成两个包含我的训练（和验证）图像和遮罩的列表。然后，我按如下所示训练了模型。

x_train_val = # list of images (nr_images, 256, 256, 3)
y_train_val = # list of masks (nr_images, 256, 256, 1)

# Define model
def standard_unet():
    inputs = Input((img_size, img_size, 3))

    s = Lambda(lambda x: x / 255) (inputs)

    c1 = Conv2D(8, (3, 3), activation='relu', padding='same') (inputs)
    c1 = Conv2D(8, (3, 3), activation='relu', padding='same') (c1)
    p1 = MaxPooling2D((2, 2)) (c1)

    c2 = Conv2D(16, (3, 3), activation='relu', padding='same') (p1)
    c2 = Conv2D(16, (3, 3), activation='relu', padding='same') (c2)
    p2 = MaxPooling2D((2, 2)) (c2)

    c3 = Conv2D(32, (3, 3), activation='relu', padding='same') (p2)
    c3 = Conv2D(32, (3, 3), activation='relu', padding='same') (c3)
    p3 = MaxPooling2D((2, 2)) (c3)

    c4 = Conv2D(64, (3, 3), activation='relu', padding='same') (p3)
    c4 = Conv2D(64, (3, 3), activation='relu', padding='same') (c4)
    p4 = MaxPooling2D(pool_size=(2, 2)) (c4)

    c5 = Conv2D(128, (3, 3), activation='relu', padding='same') (p4)
    c5 = Conv2D(128, (3, 3), activation='relu', padding='same') (c5)

    u6 = Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same') (c5)
    u6 = concatenate([u6, c4])
    c6 = Conv2D(64, (3, 3), activation='relu', padding='same') (u6)
    c6 = Conv2D(64, (3, 3), activation='relu', padding='same') (c6)

    u7 = Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same') (c6)
    u7 = concatenate([u7, c3])
    c7 = Conv2D(32, (3, 3), activation='relu', padding='same') (u7)
    c7 = Conv2D(32, (3, 3), activation='relu', padding='same') (c7)

    u8 = Conv2DTranspose(16, (2, 2), strides=(2, 2), padding='same') (c7)
    u8 = concatenate([u8, c2])
    c8 = Conv2D(16, (3, 3), activation='relu', padding='same') (u8)
    c8 = Conv2D(16, (3, 3), activation='relu', padding='same') (c8)

    u9 = Conv2DTranspose(8, (2, 2), strides=(2, 2), padding='same') (c8)
    u9 = concatenate([u9, c1], axis=3)
    c9 = Conv2D(8, (3, 3), activation='relu', padding='same') (u9)
    c9 = Conv2D(8, (3, 3), activation='relu', padding='same') (c9)

    outputs = Conv2D(1, (1, 1), activation='sigmoid') (c9)

    model = Model(inputs=[inputs], outputs=[outputs])

    return model

# IoU metric
def mean_iou(y_true, y_pred):
    prec = []
    for t in np.arange(0.5, 1.0, 0.05):
        y_pred_ = tf.to_int32(y_pred > t)
        score, up_opt = tf.metrics.mean_iou(y_true, y_pred_, 2)
        K.get_session().run(tf.local_variables_initializer())
        with tf.control_dependencies([up_opt]):
            score = tf.identity(score)
        prec.append(score)
    return K.mean(K.stack(prec), axis=0)

# Dice coef loss
def dice_coef(y_true, y_pred):
    smooth = 1.
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)

def bce_dice_loss(y_true, y_pred):
    return 0.5 * binary_crossentropy(y_true, y_pred) - dice_coef(y_true, y_pred)

# Model compiling
K.clear_session()
model = standard_unet()
model.compile(optimizer='adam', loss=bce_dice_loss, metrics=[mean_iou])

# Fitting
model.fit(x_train_val, y_train_val, validation_split=0.1, epochs=20)

这完全符合预期，当我尝试对测试图像进行预测时，我得到了不错的结果。当我想增加训练图像的数量时，我尝试通过以下功能使用ImageDataGenerator和train_generator。

# Runtime data augmentation
def get_train_test_augmented(x_data=x_train_val, y_data=y_train_val, validation_split=0.1, batch_size=32):
    x_train, x_valid, y_train, y_valid = train_test_split(x_data, y_data,
                                                          train_size=1-validation_split,
                                                          test_size=validation_split)

    data_gen_args = dict(rotation_range=45.,
                         width_shift_range=0.1,
                         height_shift_range=0.1,
                         horizontal_flip=True,
                         vertical_flip=True,
                         fill_mode='reflect')  #use 'constant'??

    x_datagen = ImageDataGenerator(**data_gen_args)
    y_datagen = ImageDataGenerator(**data_gen_args)
    x_datagen.fit(x_train, augment=True)
    y_datagen.fit(y_train, augment=True)
    x_train_augmented = x_datagen.flow(x_train, batch_size=batch_size, shuffle=True)
    y_train_augmented = y_datagen.flow(y_train, batch_size=batch_size, shuffle=True)

    # combine generators into one which yields image and masks
    train_generator = zip(x_train_augmented, y_train_augmented)

    return train_generator

对这些图像的目视检查显示它们包含了我所期望的（增强的图像和蒙版）。但是，当我现在拟合模型时，我的预测总是空白。

train_generator = get_train_test_augmented()
model.fit_generator(train_generator, epochs=20)

有没有人遇到过空白图像预测方面的相同问题或知道如何解决？谢谢，BBQuercus。

Answer 1

您将分别使用图像生成器和蒙版生成器来进行图像生成，将会发生的情况是，输入图像和标签（蒙版）的随机变换将不同。不仅如此，您还需要对两个生成器进行改组，因此它们甚至彼此不对应（图像和蒙版在生成器中对应）。

This github issue comment也谈到了这一点，并建议创建一个额外的生成器来合并两者。

请尝试为两个生成器使用相同的种子作为种子，然后查看其是否发生任何变化。

编辑

在进行图像去噪时，我注意到使用use_multiprocessing=True的解决方案不是最优的，因为在拟合时您无法使用class MergedGenerators(Sequence): def __init__(self, *generators): self.generators = generators # TODO add a check to verify that all generators have the same length def __len__(self): return len(self.generators[0]) def __getitem__(self, index): return [generator[index] for generator in self.generators] train_generator = MergedGenerators(image_generator, mask_generator)。一种解决方案是实施自定义生成器合并：

import pytesseract
import cv2
import numpy as np

img  = cv2.imread('M.jpeg')
img=cv2.resize(img,(400,200))

gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 15,7)

cv2.imshow('f',thresh)

text=pytesseract.image_to_string(img)
print(text)

Keras fit_generator与fit预测不同

1 个答案: