我正在使用Keras-CPU训练神经网络模型。当我不添加任何MaxPool
层时,它运行良好,但是每当我使用MaxPool
层时,我的jupyter内核就会死掉。
我的整个数据大小约为 480 MB。我的系统具有128GB的内存,并且在没有MaxPool
层的情况下运行时不会占用超过5-10 GB的内存。我的批次大小也很小(32),并且运行培训时内存使用没有显着增加。
CPU使用率上升几秒钟,然后下降。关于单个时期的信息,有一个附加的度量;它只是打印时期的开始:
第1/2集
然后,我的内核死了。
这是网络的体系结构:
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) (None, 382) 0
_________________________________________________________________
embedding_1 (Embedding) (None, 382, 100) 490600
_________________________________________________________________
conv1d_1 (Conv1D) (None, 382, 64) 12864
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 191, 64) 0
_________________________________________________________________
conv1d_2 (Conv1D) (None, 191, 128) 16512
_________________________________________________________________
max_pooling1d_2 (MaxPooling1 (None, 95, 128) 0
_________________________________________________________________
flatten_1 (Flatten) (None, 12160) 0
_________________________________________________________________
dense_1 (Dense) (None, 64) 778304
_________________________________________________________________
dropout_1 (Dropout) (None, 64) 0
_________________________________________________________________
dense_2 (Dense) (None, 128) 8320
_________________________________________________________________
dense_3 (Dense) (None, 1) 129
=================================================================
Total params: 1,306,729
Trainable params: 1,306,729
Non-trainable params: 0
这是我训练神经网络的方法:
def get_nnn(train_data_shape, max_val, emb_shape, drop_out, pre_batch_size, pre_batch_epoch,\
final_batch_size, final_batch_epoch, ls_start, lr_end,class_weight, lr_decay_pat,\
lr_decay_rat,num_conv,conv_kernels,conv_neurons,train_data, train_lbl,test_data,test_lbl,loss_type='binary_crossentropy',\
act='sigmoid',use_emb=True,use_max_pool=True,custom_methods=[]):
#A lot of params
nnet = Input(shape=(train_data_shape,))
inp = nnet
if use_emb:
nnet = Embedding(max_val+1, emb_shape)(nnet)
for i in range(num_conv):
nnet = Conv1D(conv_neurons[i],conv_kernels[i], padding='same',activation='relu')(nnet)
if use_max_pool:
#Adding MaxPool here
nnet = MaxPooling1D(pool_size=2)(nnet)
nnet = Flatten()(nnet)
nnet = Dense(64, activation='relu')(nnet)
nnet = Dropout(drop_out)(nnet)
nnet = Dense(128, activation='relu')(nnet)
batch_size = final_batch_size
epochs = final_batch_epoch
lentrain = train_data.shape[0]
exp_decay = lambda init, fin, steps: (init/fin)**(1/(steps-1)) - 1
steps = int(lentrain / batch_size) * epochs
lr_init, lr_fin = ls_start, lr_end
lr_decay = exp_decay(lr_init, lr_fin, steps)
output = Dense(1, activation=act)(nnet)
model = Model(inputs=inp, outputs=output)
model.compile(loss=loss_type, optimizer=Adam(lr=ls_start, decay=lr_decay),
metrics=['accuracy',precisioned,recalled,f1]+custom_methods)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=lr_decay_rat,
patience=lr_decay_pat, min_lr=lr_end)
print(model.summary())
model.fit(train_data, train_lbl,
epochs=pre_batch_epoch,
batch_size=pre_batch_size,
class_weight=class_weight,
validation_data=(test_data, test_lbl),
callbacks=[reduce_lr])
model.fit(train_data, train_lbl,
epochs=epochs,
batch_size=batch_size,
class_weight=class_weight,
validation_data=(test_data, test_lbl),
callbacks=[reduce_lr])
return model