我训练了一个mobile_net_v2 + LSTM神经元网络进行二进制分类,其体系结构如下。 火车数据和验证数据的分布与(1950,1140)相同,当我应用0.5的辍学层时,火车数据的准确性将在两个时期内均为100%,但验证准确性始终为63.11,这意味着所有验证样本均分类为label0。
,如果我删除辍学层,则trian acc和val acc都约为63.11。
这真让我感到困惑。
代码在16.04.1 Ubuntu,python 3.5.6,keras 2.2.4中运行,并以tensorflow-gpu 1.10.0作为后端。
我下载了val样本的预测,它们都像[0.5024321,0.49 ...] 我将验证样本作为训练样本,并将训练样本作为验证样本,它仍然相同。 另一个让我感到困惑的是,我像下面那样使用精度和重新调用代码进行评估,它们与val_acc相同。
def recall(y_true, y_pred):
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
recall = true_positives / (possible_positives + K.epsilon())
return recall
def precision(y_true, y_pred):
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
precision = true_positives / (predicted_positives + K.epsilon())
return precision
import yield_generator_v2
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3" #CPU = -1
batch_size = 16
epochs = 5
seq_num = 25
input_shape =(seq_num,224,224,3)
lr = 1e-04
###model
mob = MobileNetV2(input_shape = (224,224,3), alpha = 1.0, include_top=False, weights = 'imagenet',pooling='avg')
model = Sequential()
model.add(TimeDistributed(mob, input_shape = input_shape))
model.add(LSTM(256, return_sequences=True))
model.add(Flatten())
model.add(Dense(256))
model.add(Dropout(0.5))
model.add(Dense(2, activation = 'softmax'))
model_multi_gpu = multi_gpu_model(model, gpus = 4)
adam = Adam(lr = lr)
model_multi_gpu.compile(loss='categorical_crossentropy',optimizer=adam, metrics=['accuracy'])
callbacks = [EarlyStopping(monitor='acc', patience=10, verbose=0, restore_best_weights=True)]
X = np.load('./X_tr_shuffled.npy')
y = np.load('./y_tr_shuffled.npy')
X_val = np.load('./X_val_shuffled.npy')
y_val = np.load('./y_val_shuffled.npy')
get_data = yield_generator_v2.generator(X, y, batch_size)
get_val = yield_generator_v2.generator(X_val, y_val, batch_size)
model_multi_gpu.fit_generator(generator = get_data, steps_per_epoch = 194, epochs = epochs, verbose = 1, validation_data = get_val, validation_steps = 194, callbacks = callbacks)
def generator (data, label, batch_size) :
indices = np.ceil(len(data) / float(batch_size)).astype(int)
while True:
for idx in range(0,indices):
batch_y = label[idx * batch_size : (idx+1) * batch_size]
batch_x = data[idx * batch_size : (idx+1) * batch_size]
batch_seq = [] #batch_seq
for x in batch_x: #len(x) =16
seq_img = []
for img in x: #len(item) =25
seq_img.append(image.img_to_array(plt.imread(img)))
seq_x = np.array([seq_img])
batch_seq.append(seq_img)
batch_seq_list = np.array(batch_seq)
yield batch_seq_list, np.array(batch_y)
我希望它可以工作。