了解三重态损失数据生成器

时间:2019-08-14 11:39:27

标签: keras deep-learning

下面我引用代码(由https://github.com/noelcodella/tripletloss-keras-tensorflow/blob/master/tripletloss.py制作)。它可以工作,但是我不得不在这里放置自己的图像数据生成器。现在,要使其正常工作,我必须添加一个虚拟(至少我认为它是虚拟)参数(最后一个“基本”):

yield [base, positive, negative], base

正如我提到的,它似乎正在工作,但是: a)我不确定它是“虚拟”。如果我提供前瞻性数据或其他东西,那真的很糟糕。 b)无论是在这里还是在原始代码中(均可通过上面的链接获得),我都无法理解它的用途。我的意思是,没有它,Lambda层将无法正常工作,抱怨“您必须提供数据”,但是...

请帮助。

# https://github.com/noelcodella/tripletloss-keras-tensorflow/blob/master/tripletloss.py

def gen(bIsTrain):

  while True:
    arrBaseExamples = []
    arrPositiveExamples = []
    arrNegativeExamples = []

    for i in range(BATCH_SIZE):
      nImageIdx, positiveClass, negativeClass = get_triplet(TRAINING_IMAGES_PER_CLASS, bIsTrain)

      #t0 = time()
      baseExampleImg = loadImage(positiveClass, nImageIdx[0], datagen)      
      positiveExampleImg = loadImage(positiveClass, nImageIdx[1], datagen)
      negativeExampleImg = loadImage(negativeClass, nImageIdx[2], datagen)
      #t1 = time()
      #print('loaded in %f' %(t1-t0))

      arrBaseExamples.append(baseExampleImg)
      arrPositiveExamples.append(positiveExampleImg)
      arrNegativeExamples.append(negativeExampleImg)

    #base = preprocess_input(np.array(arrBaseExamples)) / 255. #'a' #preprocess_input(np.array(arrBaseExamples))
    base = np.array(arrBaseExamples) / 255.
    #train_datagen.fit(base)

    #positive = preprocess_input(np.array(arrPositiveExamples)) / 255.
    positive = np.array(arrPositiveExamples) / 255.
    #train_datagen.fit(positive)

    #negative = preprocess_input(np.array(arrNegativeExamples)) / 255.
    negative = np.array(arrNegativeExamples) / 255.
    #train_datagen.fit(negative)

    label = None

    #yield (np.stack(base, positive, negative), label) 
    #yield ({'anchor_input': base, 'positive_input': positive, 'negative_input': negative}, label) 
    yield [base, positive, negative], base

    #yield [X1i[0], X2i[0], X3i[0]], X1i[1]



def createModel(nL2):
    input_shape=(IMAGE_SIZE,IMAGE_SIZE,3)

    # Initialize a ResNet50_ImageNet Model
    resnet_input = Input(shape=input_shape)
    resnet_model = keras.applications.resnet50.ResNet50(weights='imagenet', include_top = False, input_tensor=resnet_input)

    # New Layers over ResNet50
    net = resnet_model.output
    #net = Flatten(name='flatten')(net)
    net = GlobalAveragePooling2D(name='gap')(net)
    #net = Dropout(0.5)(net)
    net = Dense(EMBEDDING_DIM,activation='relu',name='t_emb_1')(net)
    net = Lambda(lambda  x: K.l2_normalize(x,axis=1), name="lambda")(net)

    # model creation
    base_model = Model(resnet_model.input, net, name="base_model")

    # triplet framework, shared weights

    input_anchor = Input(shape=input_shape, name='input_anchor')
    input_positive = Input(shape=input_shape, name='input_pos')
    input_negative = Input(shape=input_shape, name='input_neg')

    net_anchor = base_model(input_anchor)
    net_positive = base_model(input_positive)
    net_negative = base_model(input_negative)

    # The Lamda layer produces output using given function. Here its Euclidean distance.
    positive_dist = Lambda(euclidean_distance, name='pos_dist')([net_anchor, net_positive])
    negative_dist = Lambda(euclidean_distance, name='neg_dist')([net_anchor, net_negative])
    tertiary_dist = Lambda(euclidean_distance, name='ter_dist')([net_positive, net_negative])

    # This lambda layer simply stacks outputs so both distances are available to the objective
    stacked_dists = Lambda(lambda vects: K.stack(vects, axis=1), name='stacked_dists')([positive_dist, negative_dist, tertiary_dist])

    triplet_model = Model([input_anchor, input_positive, input_negative], stacked_dists, name='triple_siamese')

    # Setting up optimizer designed for variable learning rate

    # Variable Learning Rate per Layers
    #lr_mult_dict = {}
    #last_layer = ''
    #for layer in resnet_model.layers:
    #    # comment this out to refine earlier layers
    #    # layer.trainable = False  
    #    # print layer.name
    #    lr_mult_dict[layer.name] = 1
    #    # last_layer = layer.name
    #lr_mult_dict['t_emb_1'] = 100

    base_lr = 0.0001
    momentum = 0.9
    v_optimizer = SGD(lr=base_lr, momentum=momentum, decay=0.0, nesterov=False) #, multipliers = lr_mult_dict)

    triplet_model.compile(optimizer=v_optimizer, loss=triplet_loss, metrics=[accuracy])

    return embedding_model, triplet_model


def triplet_loss(y_true, y_pred):
    margin = K.constant(1)
    return K.mean(K.maximum(K.constant(0), K.square(y_pred[:,0,0]) - 0.5*(K.square(y_pred[:,1,0])+K.square(y_pred[:,2,0])) + margin))

def accuracy(y_true, y_pred):
    return K.mean(y_pred[:,0,0] < y_pred[:,1,0])

def l2Norm(x):
    return  K.l2_normalize(x, axis=-1)

def euclidean_distance(vects):
    x, y = vects
    return K.sqrt(K.maximum(K.sum(K.square(x - y), axis=1, keepdims=True), K.epsilon()))           


import random
from datetime import datetime

#adam_optim = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999)

#checkpoint = ModelCheckpoint(path_model, monitor='loss', verbose=1, save_best_only=True, mode='min')
#early = EarlyStopping(monitor="val_loss", mode="min", patience=2)
checkpoint = ModelCheckpoint(best_weights_filepath, monitor="val_loss", save_best_only=True, save_weights_only=True, mode='auto')
callbacks_list = [checkpoint]  # , early]

gen_train = gen(True)
gen_valid = gen(False)

for i in range(0, len(arrParams)):
  nL2 = arrParams[i][0]
  EMBEDDING_DIM = arrParams[i][1]

  deleteSavedNet(best_weights_filepath)

  #random.seed(datetime.now())
  embedding_model, triplet_model = createModel(nL2)

  #loadBestModel()

  # No need to set classifier layers to True, as already True as created (?)
  nNumOfClasses = len(arrLabels)
  nNumOfTrainSamples = TRAINING_IMAGES_PER_CLASS * nNumOfClasses
  nNumOfValidSamples = VALIDATION_IMAGES_PER_CLASS * nNumOfClasses
  STEP_SIZE_TRAIN = nNumOfTrainSamples // BATCH_SIZE
  if(STEP_SIZE_TRAIN == 0):
    STEP_SIZE_TRAIN = 1

  STEP_SIZE_VALID = nNumOfValidSamples // BATCH_SIZE
  if(STEP_SIZE_VALID == 0):
    STEP_SIZE_VALID = 1

  #triplet_model.compile(loss='mean_absolute_error', optimizer=Adam())

  # MNIST
  #triplet_model.compile(loss=None, optimizer=adam_optim, metrics=['accuracy', triplet_accuracy])

  #triplet_model.compile(loss=None, optimizer=Adam(), metrics=['accuracy', triplet_accuracy])

  #triplet_model.compile(loss='mean_absolute_error', optimizer=Adam())
  #triplet_model.compile(loss=None, optimizer="adam", metrics=['accuracy']) #metrics=['accuracy'])
  #triplet_model.compile(loss=None, optimizer=SGD(lr=0.01, momentum=0.0, decay=0.0, nesterov=False), metrics=['accuracy'])
  #triplet_model.compile(loss=None, optimizer=RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=0.0), metrics=['accuracy'])
  #triplet_model.compile(loss=None, optimizer=RMSprop(lr=0.00001, rho=0.9, epsilon=None, decay=0.0), metrics=['accuracy'])
  #triplet_model.compile(loss=None, optimizer=Adagrad(lr=0.01, epsilon=None, decay=0.0), metrics=['accuracy'])
  #triplet_model.compile(loss=None, optimizer=Adadelta(lr=1.0, rho=0.95, epsilon=None, decay=0.0), metrics=['accuracy'])
  #triplet_model.compile(loss=None, optimizer=Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0), metrics=['accuracy'])
  #triplet_model.compile(loss=None, optimizer=Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004), metrics=['accuracy'])
  #triplet_model.compile(loss=None, optimizer=Nadam(lr=0.0005, beta_1=0.8, beta_2=0.9, epsilon=None, schedule_decay=0.2), metrics=['accuracy'])

  #SGD(lr=0.01, momentum=0.0, decay=0.0, nesterov=False)
  #RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=0.0)
  #Adagrad(lr=0.01, epsilon=None, decay=0.0)
  #Adadelta(lr=1.0, rho=0.95, epsilon=None, decay=0.0)
  #Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0)
  #Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004)

  #triplet_model.compile(loss=None, optimizer=Adam(0.00004), metrics=['accuracy'])

  #triplet_model.compile(loss=None, optimizer=Adam(0.00004), metrics=['accuracy']) #, my_accuracy])
  #triplet_model.compile(loss=None, optimizer=Adam(0.000004), metrics=['accuracy'])

  print("Available metrics: ", triplet_model.metrics_names)

  history = triplet_model.fit_generator(gen_train, validation_data=gen_valid, 
    epochs=EPOCHS, verbose=1, steps_per_epoch=STEP_SIZE_TRAIN, validation_steps=STEP_SIZE_VALID, callbacks=callbacks_list)
    #, workers=4, use_multiprocessing=True)

  print(nL2, EMBEDDING_DIM)
  plotHistoryLoss()  

1 个答案:

答案 0 :(得分:1)

嗯,可能为时已晚,但是单个三元组实际上没有分类的类别(有人可能会说,三元组的类型很像半硬,硬……但是它们实际上描述了损失值),而{ generator中的{1}}需要产生一对fit_generator()inputs(代表类),因此可以将targets传递到dummy。< / p>