我创建了用于面部识别的暹罗神经网络。我使用Keras lib。 我的模型如下:
def create_base_model():
xception = Xception(include_top=False, weights=None, input_shape=image_sizes.IMAGE_SHAPE_SIZE_SN + (3,))
output = GlobalAveragePooling2D()(xception.output)
base_model = Model(xception.input, output)
return base_model
def embedder(conv_feat_size):
input = Input((conv_feat_size,), name='input')
normalize = Lambda(lambda x: K.l2_normalize(x, axis=-1), name='normalize')
x = Dense(512)(input)
x = LeakyReLU(alpha=0.1)(x)
x = Dense(128)(x)
x = normalize(x)
model = Model(input, x)
return model
def get_siamese_model(base_model):
inp_shape = K.int_shape(base_model.input)[1:]
conv_feat_size = K.int_shape(base_model.output)[-1]
input_a = Input(inp_shape, name='anchor')
input_p = Input(inp_shape, name='positive')
input_n = Input(inp_shape, name='negative')
emb_model = embedder(conv_feat_size)
output_a = emb_model(base_model(input_a))
output_p = emb_model(base_model(input_p))
output_n = emb_model(base_model(input_n))
merged_vector = Concatenate(axis=-1)([output_a, output_p, output_n])
model = Model(inputs=[input_a, input_p, input_n],
outputs=merged_vector)
return model
我使用三重损失功能:
def triplet_loss(y_true, y_pred, cosine=True, alpha=0.2):
embedding_size = k.int_shape(y_pred)[-1] // 3
ind = int(embedding_size * 2)
a_pred = y_pred[:, :embedding_size]
p_pred = y_pred[:, embedding_size:ind]
n_pred = y_pred[:, ind:]
if cosine:
positive_distance = 1 - k.sum((a_pred * p_pred), axis=-1)
negative_distance = 1 - k.sum((a_pred * n_pred), axis=-1)
else:
positive_distance = k.sqrt(k.sum(k.square(a_pred - p_pred), axis=-1))
negative_distance = k.sqrt(k.sum(k.square(a_pred - n_pred), axis=-1))
loss = k.maximum(0.0, positive_distance - negative_distance + alpha)
return loss
然后我训练了模型:
model.compile(Adam(lr=0.0001), loss=triplet_loss)
model.fit_generator(train_gen,
steps_per_epoch=steps_per_epoch,
epochs=2,
validation_data=valid_gen,
validation_steps=validation_steps,
workers=12,
use_multiprocessing=True,
callbacks=callbacks)
我在训练时获得了很好的准确性。 但是,当我检查保存的模型时,对于不同的图片,我会得到相同的预测。看起来像这样:
model = load_model(model_name, compile=False)
inp = model.layers[0].input
base_model = model.layers[3]
emb_model = model.layers[4]
conv_feat_model = Model(inp, emb_model(base_model(inp)))
preds = conv_feat_model.predict(img1)
preds2 = conv_feat_model.predict(img2)
preds和preds相同:
[[ 0.00798154 -0.02381053 -0.02582906 -0.07199518 0.10519011 -0.00314439
-0.10797752 0.06806625 0.06223447 0.12241443 -0.07735252 0.02124843
0.15940368 0.06779307 0.07217986 -0.09875521 0.12994479 0.11027689
-0.06931251 0.07022905 -0.04785226 -0.04238459 -0.00985373 -0.07522172
0.1815699 -0.11751417 -0.02868035 -0.03449575 -0.16308287 -0.01191556
-0.12341576 0.03737723 -0.09038088 0.01470615 -0.00578096 -0.07435378
-0.00956436 -0.10893144 0.08642803 0.01112687 0.01189745 0.128754
0.07033242 -0.10699189 0.0508917 -0.02893313 -0.05784947 -0.1494101
-0.0883211 -0.18141606 0.0019109 0.1803653 0.08015647 0.0307116
-0.05125592 -0.03757589 0.06253863 -0.24168698 0.13919576 -0.06217849
-0.06851473 -0.15789092 0.07163556 -0.00736173 0.06365957 0.05686413
-0.10195459 0.11052794 -0.04966989 -0.06265325 0.08019789 0.04270525
-0.08460336 0.15021887 0.15742442 0.01014612 0.04533008 -0.0078541
0.10607596 0.01708411 0.02101402 -0.06603298 -0.02715356 -0.01722299
-0.01136316 0.14444259 -0.10876989 -0.11401331 0.1932368 0.08324403
-0.07035927 0.01442239 0.09930176 0.07264478 -0.15251659 0.06386208
-0.15979335 -0.02111898 0.02357377 0.05760377 -0.05667325 0.09260555
-0.10214964 0.01784979 -0.18854788 -0.12127047 0.00702229 0.06677626
0.03844206 -0.12365757 0.08759588 0.06660971 0.01137797 0.02459283
0.09108339 0.00450872 -0.09746534 0.156114 0.00185758 0.01722586
0.01481272 0.02754061 -0.04379161 -0.03591692 0.07744277 -0.03142267
0.00525929 0.0867951 ]]
我试图用不同的时期来训练我的模型,尝试了不同的图像预处理。但是没有进展。
我注意到我的数据集中的脸下有一些白色背景,也许会影响我的预测。 可以请人帮我解决这个问题吗?