Question

我创建了用于面部识别的暹罗神经网络。我使用Keras lib。我的模型如下：

def create_base_model():
    xception = Xception(include_top=False, weights=None, input_shape=image_sizes.IMAGE_SHAPE_SIZE_SN + (3,))
    output = GlobalAveragePooling2D()(xception.output)
    base_model = Model(xception.input, output)

    return base_model


def embedder(conv_feat_size):
    input = Input((conv_feat_size,), name='input')
    normalize = Lambda(lambda x: K.l2_normalize(x, axis=-1), name='normalize')

    x = Dense(512)(input)
    x = LeakyReLU(alpha=0.1)(x)
    x = Dense(128)(x)
    x = normalize(x)

    model = Model(input, x)

    return model


def get_siamese_model(base_model):
    inp_shape = K.int_shape(base_model.input)[1:]
    conv_feat_size = K.int_shape(base_model.output)[-1]

    input_a = Input(inp_shape,  name='anchor')
    input_p = Input(inp_shape,  name='positive')
    input_n = Input(inp_shape,  name='negative')

    emb_model = embedder(conv_feat_size)

    output_a = emb_model(base_model(input_a))
    output_p = emb_model(base_model(input_p))
    output_n = emb_model(base_model(input_n))

    merged_vector = Concatenate(axis=-1)([output_a, output_p, output_n])
    model = Model(inputs=[input_a, input_p, input_n],
                  outputs=merged_vector)

    return model

我使用三重损失功能：

def triplet_loss(y_true, y_pred, cosine=True, alpha=0.2):
    embedding_size = k.int_shape(y_pred)[-1] // 3
    ind = int(embedding_size * 2)
    a_pred = y_pred[:, :embedding_size]
    p_pred = y_pred[:, embedding_size:ind]
    n_pred = y_pred[:, ind:]
    if cosine:
        positive_distance = 1 - k.sum((a_pred * p_pred), axis=-1)
        negative_distance = 1 - k.sum((a_pred * n_pred), axis=-1)
    else:
        positive_distance = k.sqrt(k.sum(k.square(a_pred - p_pred), axis=-1))
        negative_distance = k.sqrt(k.sum(k.square(a_pred - n_pred), axis=-1))
    loss = k.maximum(0.0, positive_distance - negative_distance + alpha)

    return loss

然后我训练了模型：

model.compile(Adam(lr=0.0001), loss=triplet_loss)
model.fit_generator(train_gen,
                        steps_per_epoch=steps_per_epoch,
                        epochs=2,
                        validation_data=valid_gen,
                        validation_steps=validation_steps,
                        workers=12,
                        use_multiprocessing=True,
                        callbacks=callbacks)

我在训练时获得了很好的准确性。但是，当我检查保存的模型时，对于不同的图片，我会得到相同的预测。看起来像这样：

model = load_model(model_name, compile=False)

inp = model.layers[0].input
base_model = model.layers[3]
emb_model = model.layers[4]
conv_feat_model = Model(inp, emb_model(base_model(inp)))

preds = conv_feat_model.predict(img1)
preds2 = conv_feat_model.predict(img2)

preds和preds相同：

[[ 0.00798154 -0.02381053 -0.02582906 -0.07199518  0.10519011 -0.00314439
  -0.10797752  0.06806625  0.06223447  0.12241443 -0.07735252  0.02124843
   0.15940368  0.06779307  0.07217986 -0.09875521  0.12994479  0.11027689
  -0.06931251  0.07022905 -0.04785226 -0.04238459 -0.00985373 -0.07522172
   0.1815699  -0.11751417 -0.02868035 -0.03449575 -0.16308287 -0.01191556
  -0.12341576  0.03737723 -0.09038088  0.01470615 -0.00578096 -0.07435378
  -0.00956436 -0.10893144  0.08642803  0.01112687  0.01189745  0.128754
   0.07033242 -0.10699189  0.0508917  -0.02893313 -0.05784947 -0.1494101
  -0.0883211  -0.18141606  0.0019109   0.1803653   0.08015647  0.0307116
  -0.05125592 -0.03757589  0.06253863 -0.24168698  0.13919576 -0.06217849
  -0.06851473 -0.15789092  0.07163556 -0.00736173  0.06365957  0.05686413
  -0.10195459  0.11052794 -0.04966989 -0.06265325  0.08019789  0.04270525
  -0.08460336  0.15021887  0.15742442  0.01014612  0.04533008 -0.0078541
   0.10607596  0.01708411  0.02101402 -0.06603298 -0.02715356 -0.01722299
  -0.01136316  0.14444259 -0.10876989 -0.11401331  0.1932368   0.08324403
  -0.07035927  0.01442239  0.09930176  0.07264478 -0.15251659  0.06386208
  -0.15979335 -0.02111898  0.02357377  0.05760377 -0.05667325  0.09260555
  -0.10214964  0.01784979 -0.18854788 -0.12127047  0.00702229  0.06677626
   0.03844206 -0.12365757  0.08759588  0.06660971  0.01137797  0.02459283
   0.09108339  0.00450872 -0.09746534  0.156114    0.00185758  0.01722586
   0.01481272  0.02754061 -0.04379161 -0.03591692  0.07744277 -0.03142267
   0.00525929  0.0867951 ]]

我试图用不同的时期来训练我的模型，尝试了不同的图像预处理。但是没有进展。

我注意到我的数据集中的脸下有一些白色背景，也许会影响我的预测。可以请人帮我解决这个问题吗？

连体网络-对不同图片的相同预测

0 个答案: