我有两个模型从相同的训练数据集中提取输入值。我正在尝试使用具有不同学习率的两个优化器来交替训练两个模型。因此,在训练一个模型时,我必须冻结另一个模型的权重,反之亦然。但是,我使用的方法训练时间太长,甚至出现OOM错误。但是,当我简单地将模型一起训练时,不会发生此类问题。
代码片段和示例模型的图像附在下面。但是,实际模型具有许多层和高维输入。
def convA(x):
conv1 = keras.layers.Conv2D(64, (3,3), strides=(1, 1), padding='valid', activation='relu', name = 'conv21')(x)
conv2 = keras.layers.Conv2D(16, (3,3), strides=(1, 1), padding='valid',activation='relu', name = 'conv22')(conv1)
return conv2
def convB(x):
conv1 = keras.layers.Conv2D(64, (3,3), strides=(1, 1), padding='valid', activation='relu', name = 'conv2a')(x)
conv2 = keras.layers.Conv2D(16, (3,3), strides=(1, 1), padding='valid',activation='relu', name = 'conv2b')(conv1)
return conv2
x = Input(shape=(11,11,32), name='input1')
convP = convA(x)
convQ = convB(x)
model1 = Model(x,convP)
model2 = Model(x,convQ)
multiply_layer = keras.layers.Multiply()([model1(x), model2(x)])
conv1_reshape = keras.layers.Reshape([7*7*16],name = 'fc_reshape')(multiply_layer)
fc = keras.layers.Dense(15, activation='softmax', name = 'fc1')(conv1_reshape)
model_main = Model(x,fc)
optim1 = keras.optimizers.SGD(0.0009, momentum=0.01, nesterov=True)
optim2 = keras.optimizers.SGD(0.00009, momentum=0.01, nesterov=True)
for epoch in range(250):
for batch in range(100):
x_batch = x_train[batch*16:(batch+1)*16,:,:,:]
y_batch = y_train[batch*16:(batch+1)*16,:]
model1.trainable = False
model2.trainable = True
model_main.compile(loss='categorical_crossentropy', optimizer=optim1, metrics=['accuracy'])
model_main.train_on_batch(x_batch, y_batch)
model1.trainable = True
model2.trainable = False
model_main.compile(loss='categorical_crossentropy', optimizer=optim2, metrics=['accuracy'])
model_main.train_on_batch(x_batch, y_batch)