看起来简单的CAE不适用于Carvana数据集 我正在为Carvana数据集尝试简单的CAE。您可以下载here
我的代码如下:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from skimage.io import imread
from skimage.transform import downscale_local_mean
from skimage.color import rgb2grey
from os.path import join, isfile
from tqdm import tqdm_notebook
from sklearn.model_selection import train_test_split
from keras.layers import Conv2D, MaxPooling2D, Conv2DTranspose, Input, concatenate
from keras.models import Model
from keras.callbacks import ModelCheckpoint
import keras.backend as K
from scipy.ndimage.filters import gaussian_filter
from keras.optimizers import Adam
from random import randint
import hickle as hkl
import dill
class Data(object):
def __init__(self, X, Y):
self.X = X
self.Y = Y
input_folder = join('..', 'input')
print('Path:',input_folder)
data_file_name = 'datafile.pkl'
df_mask = pd.read_csv(join(input_folder, 'train_masks.csv'), usecols=['img'])
load_img = lambda im, idx: imread(join(input_folder, 'train', '{}_{:02d}.jpg'.format(im, idx)))
load_mask = lambda im, idx: imread(join(input_folder, 'train_masks', '{}_{:02d}_mask.gif'.format(im, idx)))
ids_train = df_mask['img'].map(lambda s: s.split('_')[0]).unique()
imgs_idx = list(range(1, 17))
resize = lambda im: downscale_local_mean(im, (4,4) if im.ndim==2 else (4,4,1))
mask_image = lambda im, mask: (im * np.expand_dims(mask, 2))
num_train = 48#len(ids_train)
if isfile(data_file_name):
#with open(data_file_name, 'rb') as f:
data = hkl.load(data_file_name)
X = data.X
y = data.y
else:
X = np.empty((num_train, 320, 480, 1), dtype=np.float32)
y = np.empty((num_train, 320, 480, 1), dtype=np.float32)
with tqdm_notebook(total=num_train) as bar:
idx = 1 # Rotation index
for i, img_id in enumerate(ids_train[:num_train]):
imgs_id = [resize(load_img(img_id, j)) for j in imgs_idx]
greyscale = rgb2grey(imgs_id[idx-1]) / 255
greyscale = np.expand_dims(greyscale, 2)
X[i] = greyscale
y_processed = resize(np.expand_dims(load_mask(img_id, idx), 2)) / 255.
y[i] = y_processed
del imgs_id # Free memory
bar.update()
#data = Data(X, y)
#with open(data_file_name, 'w+') as f:
#hkl.dump(data, data_file_name)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=43)
y_train_mean = y_train.mean(axis=0)
y_train_std = y_train.std(axis=0)
y_train_min = y_train.min(axis=0)
y_features = np.concatenate([y_train_mean, y_train_std, y_train_min], axis=2)
inp = Input((320, 480, 1))
conv1 = Conv2D(64, 3, activation='relu', padding='same')(inp)
max1 = MaxPooling2D(2)(conv1)
conv2 = Conv2D(48, 5, activation='relu', padding='same')(max1)
max2 = MaxPooling2D(2)(conv2)
conv3 = Conv2D(32, 7, activation='relu', padding='same')(max2)
deconv3 = Conv2DTranspose(32, 7, strides=4, activation='relu', padding='same')(conv3)
deconv2 = Conv2DTranspose(48, 5, strides=2, activation='relu', padding='same')(conv2)
deconvs = concatenate([conv1, deconv2, deconv3])
out = Conv2D(1, 7, activation='sigmoid', padding='same')(deconvs)
model = Model(inp, out)
model.summary()
smooth = 1.
# From here: https://github.com/jocicmarko/ultrasound-nerve-segmentation/blob/master/train.py
def dice_coef(y_true, y_pred):
y_true_f = K.flatten(y_true)
y_pred_f = K.flatten(y_pred)
intersection = K.sum(y_true_f * y_pred_f)
return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
def bce_dice_loss(y_true, y_pred):
return 0.5 * K.binary_crossentropy(y_true, y_pred) - dice_coef(y_true, y_pred)
model.compile(Adam(lr=0.0001), bce_dice_loss, metrics=['accuracy', dice_coef])
cae_filepath = "cae_375.hdf5"
pre_mcp = ModelCheckpoint(cae_filepath, monitor='val_dice_coef', verbose=2, save_best_only=True, mode='max')
pre_history = model.fit(X_train, X_train, epochs=1000, validation_data=(X_val, X_val), batch_size=22, verbose=2, callbacks=[pre_mcp])
model.compile(Adam(lr=0.0001), bce_dice_loss, metrics=['accuracy', dice_coef])
model.load_weights(cae_filepath)
filepath="weights-improvement2_lre-5-{epoch:02d}-{val_acc:.5f}-{val_dice_coef:.5f}.hdf5"
mcp = ModelCheckpoint(filepath, monitor='val_dice_coef', verbose=2, save_best_only=True, mode='max')
history = model.fit(X_train, y_train, epochs=1000, validation_data=(X_val, y_val), batch_size=22, verbose=2, callbacks=[mcp])
idxs = [0, X_val.shape[0]/2, randint(1, X_val.shape[0] -1)]
for idx in idxs:
print('Index:', idx)
x = X_val[idx]
fig, ax = plt.subplots(3,3, figsize=(16, 16))
ax = ax.ravel()
cmaps = ['Reds', 'Greens', 'Blues']
for i in range(x.shape[-1]):
ax[i].imshow(x[...,i], cmap='gray') #cmaps[i%3])
ax[i].set_title('channel {}'.format(i))
ax[-8].imshow(y_val[idx,...,0], cmap='gray')
ax[-8].set_title('y')
y_pred = model.predict(x[None]).squeeze()
ax[-7].imshow(y_pred, cmap='gray')
ax[-7].set_title('y_pred')
ax[-6].imshow(gaussian_filter(y_pred,1) > 0.5, cmap='gray')
ax[-6].set_title('1')
ax[-5].imshow(gaussian_filter(y_pred,2) > 0.5, cmap='gray')
ax[-5].set_title('2')
ax[-4].imshow(gaussian_filter(y_pred,3) > 0.5, cmap='gray')
ax[-4].set_title('3')
ax[-3].imshow(gaussian_filter(y_pred,4) > 0.5, cmap='gray')
ax[-3].set_title('4')
ax[-2].imshow(gaussian_filter(y_pred,5) > 0.5, cmap='gray')
ax[-2].set_title('5')
ax[-1].imshow(gaussian_filter(y_pred,6) > 0.5, cmap='gray')
ax[-1].set_title('6')
没有预训练就可以正常工作,你可以通过评论这些行来检查它:
model.compile(Adam(lr=0.0001), bce_dice_loss, metrics=['accuracy', dice_coef])
cae_filepath = "cae_375.hdf5"
pre_mcp = ModelCheckpoint(cae_filepath, monitor='val_dice_coef', verbose=2, save_best_only=True, mode='max')
pre_history = model.fit(X_train, X_train, epochs=1000, validation_data=(X_val, X_val), batch_size=22, verbose=2, callbacks=[pre_mcp])
model.compile(Adam(lr=0.0001), bce_dice_loss, metrics=['accuracy', dice_coef])
model.load_weights(cae_filepath)
然而,然后我尝试了预训练自动编码器来重建原始图像我没有精确度改进,只有骰子系数改进: 此外,然后我尝试使用预先训练的自动编码器进行训练,根据训练数据进行预测,我得到了不同的结果 - 准确度停留在0,8374级,骰子系数从最初的0.11864下降到7.5781e-04:
自动编码器预先列车模型应该提高模型精度。根据我的经验,对于具有简单CAE的完整MNIST数据集,其精度提高了99.62%
另外,我查看数据以确保两种情况的性质相同(您可以通过临时变量来查看它以在代码中调试它)
在第二种情况下,我有一个想法,它可能是由于这个事实造成的,我们不仅有编码器,还有解码器的权重,它可能在训练期间引起问题
重置解码器的权重后,我有一段时间几乎相同的图片: 但经过49次迭代过程已经到了关键时刻,培训过程变得高效:
然而,我不知道为什么在自动编码器训练期间我们没有准确度增加,尽管骰子系数有所改善,但我的代码或框架可能出现了问题
其他信息:
我的环境:
任何建议都将受到赞赏