简单的CAE问题

时间:2017-10-24 22:55:53

标签: python keras theano autoencoder

看起来简单的CAE不适用于Carvana数据集 我正在为Carvana数据集尝试简单的CAE。您可以下载here

我的代码如下:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from skimage.io import imread
from skimage.transform import downscale_local_mean
from skimage.color import rgb2grey
from os.path import join, isfile
from tqdm import tqdm_notebook
from sklearn.model_selection import train_test_split
from keras.layers import Conv2D, MaxPooling2D, Conv2DTranspose, Input, concatenate
from keras.models import Model
from keras.callbacks import ModelCheckpoint
import keras.backend as K
from scipy.ndimage.filters import gaussian_filter
from keras.optimizers import Adam
from random import randint
import hickle as hkl
import dill

class Data(object):
    def __init__(self, X, Y):
        self.X = X
        self.Y = Y

input_folder = join('..', 'input')

print('Path:',input_folder)

data_file_name = 'datafile.pkl'

df_mask = pd.read_csv(join(input_folder, 'train_masks.csv'), usecols=['img'])        
load_img = lambda im, idx: imread(join(input_folder, 'train', '{}_{:02d}.jpg'.format(im, idx)))
load_mask = lambda im, idx: imread(join(input_folder, 'train_masks', '{}_{:02d}_mask.gif'.format(im, idx)))

ids_train = df_mask['img'].map(lambda s: s.split('_')[0]).unique()
imgs_idx = list(range(1, 17))

resize = lambda im: downscale_local_mean(im, (4,4) if im.ndim==2 else (4,4,1))
mask_image = lambda im, mask: (im * np.expand_dims(mask, 2))

num_train = 48#len(ids_train)

if isfile(data_file_name):
    #with open(data_file_name, 'rb') as f:
    data = hkl.load(data_file_name)
    X = data.X
    y = data.y

else:
    X = np.empty((num_train, 320, 480, 1), dtype=np.float32)
    y = np.empty((num_train, 320, 480, 1), dtype=np.float32)

    with tqdm_notebook(total=num_train) as bar:
        idx = 1 # Rotation index
        for i, img_id in enumerate(ids_train[:num_train]):
            imgs_id = [resize(load_img(img_id, j)) for j in imgs_idx]
            greyscale = rgb2grey(imgs_id[idx-1]) / 255
            greyscale = np.expand_dims(greyscale, 2)
            X[i] = greyscale
            y_processed = resize(np.expand_dims(load_mask(img_id, idx), 2)) / 255.
            y[i] = y_processed
            del imgs_id # Free memory
            bar.update()

    #data = Data(X, y)
    #with open(data_file_name, 'w+') as f:
    #hkl.dump(data, data_file_name)

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=43)

y_train_mean = y_train.mean(axis=0)
y_train_std = y_train.std(axis=0)
y_train_min = y_train.min(axis=0)

y_features = np.concatenate([y_train_mean, y_train_std, y_train_min], axis=2)

inp = Input((320, 480, 1))
conv1 = Conv2D(64, 3, activation='relu', padding='same')(inp)
max1 = MaxPooling2D(2)(conv1)
conv2 = Conv2D(48, 5, activation='relu', padding='same')(max1)
max2 = MaxPooling2D(2)(conv2)
conv3 = Conv2D(32, 7, activation='relu', padding='same')(max2)

deconv3 = Conv2DTranspose(32, 7, strides=4, activation='relu', padding='same')(conv3)
deconv2 = Conv2DTranspose(48, 5, strides=2, activation='relu', padding='same')(conv2)

deconvs = concatenate([conv1, deconv2, deconv3])

out = Conv2D(1, 7, activation='sigmoid', padding='same')(deconvs)

model = Model(inp, out)
model.summary()

smooth = 1.

# From here: https://github.com/jocicmarko/ultrasound-nerve-segmentation/blob/master/train.py
def dice_coef(y_true, y_pred):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)


def bce_dice_loss(y_true, y_pred):
    return 0.5 * K.binary_crossentropy(y_true, y_pred) - dice_coef(y_true, y_pred)

model.compile(Adam(lr=0.0001), bce_dice_loss, metrics=['accuracy', dice_coef])
cae_filepath = "cae_375.hdf5"
pre_mcp = ModelCheckpoint(cae_filepath, monitor='val_dice_coef', verbose=2, save_best_only=True, mode='max')
pre_history = model.fit(X_train, X_train, epochs=1000, validation_data=(X_val, X_val), batch_size=22, verbose=2, callbacks=[pre_mcp])

model.compile(Adam(lr=0.0001), bce_dice_loss, metrics=['accuracy', dice_coef])

model.load_weights(cae_filepath)

filepath="weights-improvement2_lre-5-{epoch:02d}-{val_acc:.5f}-{val_dice_coef:.5f}.hdf5"
mcp = ModelCheckpoint(filepath, monitor='val_dice_coef', verbose=2, save_best_only=True, mode='max')

history = model.fit(X_train, y_train, epochs=1000, validation_data=(X_val, y_val), batch_size=22, verbose=2, callbacks=[mcp])

idxs = [0, X_val.shape[0]/2, randint(1, X_val.shape[0] -1)]

for idx in idxs:
    print('Index:', idx)
    x = X_val[idx]

    fig, ax = plt.subplots(3,3, figsize=(16, 16))
    ax = ax.ravel()

    cmaps = ['Reds', 'Greens', 'Blues']
    for i in range(x.shape[-1]):
        ax[i].imshow(x[...,i], cmap='gray') #cmaps[i%3])
        ax[i].set_title('channel {}'.format(i))

    ax[-8].imshow(y_val[idx,...,0], cmap='gray')
    ax[-8].set_title('y')

    y_pred = model.predict(x[None]).squeeze()
    ax[-7].imshow(y_pred, cmap='gray')
    ax[-7].set_title('y_pred')

    ax[-6].imshow(gaussian_filter(y_pred,1) > 0.5, cmap='gray')
    ax[-6].set_title('1')

    ax[-5].imshow(gaussian_filter(y_pred,2) > 0.5, cmap='gray')
    ax[-5].set_title('2')

    ax[-4].imshow(gaussian_filter(y_pred,3) > 0.5, cmap='gray')
    ax[-4].set_title('3')

    ax[-3].imshow(gaussian_filter(y_pred,4) > 0.5, cmap='gray')
    ax[-3].set_title('4')

    ax[-2].imshow(gaussian_filter(y_pred,5) > 0.5, cmap='gray')
    ax[-2].set_title('5')

    ax[-1].imshow(gaussian_filter(y_pred,6) > 0.5, cmap='gray')
    ax[-1].set_title('6')

没有预训练就可以正常工作,你可以通过评论这些行来检查它:

model.compile(Adam(lr=0.0001), bce_dice_loss, metrics=['accuracy', dice_coef])
cae_filepath = "cae_375.hdf5"
pre_mcp = ModelCheckpoint(cae_filepath, monitor='val_dice_coef', verbose=2, save_best_only=True, mode='max')
pre_history = model.fit(X_train, X_train, epochs=1000, validation_data=(X_val, X_val), batch_size=22, verbose=2, callbacks=[pre_mcp])

model.compile(Adam(lr=0.0001), bce_dice_loss, metrics=['accuracy', dice_coef])

model.load_weights(cae_filepath)

然而,然后我尝试了预训练自动编码器来重建原始图像我没有精确度改进,只有骰子系数改进: 此外,然后我尝试使用预先训练的自动编码器进行训练,根据训练数据进行预测,我得到了不同的结果 - 准确度停留在0,8374级,骰子系数从最初的0.11864下降到7.5781e-04: enter image description here enter image description here

自动编码器预先列车模型应该提高模型精度。根据我的经验,对于具有简单CAE的完整MNIST数据集,其精度提高了99.62%

另外,我查看数据以确保两种情况的性质相同(您可以通过临时变量来查看它以在代码中调试它)

在第二种情况下,我有一个想法,它可能是由于这个事实造成的,我们不仅有编码器,还有解码器的权重,它可能在训练期间引起问题

重置解码器的权重后,我有一段时间几乎相同的图片: enter image description here 但经过49次迭代过程已经到了关键时刻,培训过程变得高效: enter image description here

然而,我不知道为什么在自动编码器训练期间我们没有准确度增加,尽管骰子系数有所改善,但我的代码或框架可能出现了问题

其他信息:

我的环境:

  • Ubuntu 16.04
  • Python 2.7
  • Theano 0.10
  • Keras 2.0.8

enter image description here

结构: enter image description here

任何建议都将受到赞赏

0 个答案:

没有答案