Question

我从大小为4096的预训练vgg19中提取了CNN特征。然后，我使用一种较浅的体系结构来训练具有softmax和中心损失的分类器。不幸的是，softmax损失函数返回nan。 here有详细的讨论，但是由于labels和logits具有两种不同的数据格式（int64，float32），因此我无法解决剪辑问题。此外，我也改变了学习速度，但仍然遇到相同的错误。

请让我知道如何解决这种情况。

from __future__ import division
from __future__ import print_function

import csv

import numpy as np
import tensorflow as tf

from retrieval_model import setup_train_model

FLAGS = None
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "0"


def get_name(read_file):
    feat_lst = []
    identifier_lst = []
    with open(read_file, 'r') as csvfile:
        read_file = csv.reader(csvfile, delimiter=',')
        for row in read_file:
            feat = row[:-1]
            s_feat = [float(i) for i in feat]
            identifier = row[-1]
            feat_lst.append(s_feat)
            identifier_lst.append(identifier)
    return feat_lst, identifier_lst


def get_batch(batch_index, batch_size, labels, f_lst):
    start_ind = batch_index * batch_size
    end_ind = start_ind + batch_size

    return f_lst[start_ind:end_ind], labels[start_ind:end_ind]


def creat_dict(orig_labels):
    dict = {}
    count = 0
    for x in orig_labels:
        n_label = dict.get(x, None)
        if n_label is None:
            dict[x] = count
            count += 1
    return dict




def main(_):

    save_dir = 'model/one-branch-ckpt'
    train_file = 'gtrain.csv'
    img_feat, img_labels = get_name(train_file)

    map_dict   = creat_dict(img_labels)
    img_labels = [map_dict.get(x) for x in img_labels]


    im_feat_dim = 4096
    batch_size = 50
    max_num_epoch = 10
    steps_per_epoch = len(img_feat) // batch_size
    num_steps = steps_per_epoch * max_num_epoch

    # Setup placeholders for input variables.
    im_feat_plh = tf.placeholder(tf.float32, shape=[batch_size, im_feat_dim])
    label_plh   = tf.placeholder(tf.int64, shape=(batch_size), name='labels')

    train_phase_plh = tf.placeholder(tf.bool)

    # Setup training operation.
    t_l = setup_train_model(im_feat_plh, train_phase_plh, label_plh, classes)

    # Setup optimizer.
    global_step = tf.Variable(0, trainable=False)
    init_learning_rate = 0.0001
    learning_rate = tf.train.exponential_decay(init_learning_rate, global_step,
                                               steps_per_epoch, 0.794, staircase=True)
    optim = tf.train.AdamOptimizer(init_learning_rate)
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        train_step = optim.minimize(t_l, global_step=global_step)

    # Setup model saver.
    saver = tf.train.Saver(save_relative_paths=True,max_to_keep=1)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        for i in range(num_steps):
            im_feats, labels = get_batch(
                i % steps_per_epoch, batch_size, img_labels, img_feat)
            feed_dict = {
                im_feat_plh: im_feats,
                label_plh: labels,
                train_phase_plh: True,
            }
            [_, loss_val] = sess.run([train_step, t_l], feed_dict=feed_dict)
            if i % 100 == 0:
                print('Epoch: %d Step: %d Loss: %f' % (i // steps_per_epoch, i, loss_val))
            if i % steps_per_epoch == 0 and i > 0:
                print('Saving checkpoint at step %d' % i)
                saver.save(sess, save_dir, global_step=global_step)


if __name__ == '__main__':
    np.random.seed(0)
    tf.set_random_seed(0)
    tf.app.run(main=main)

**************************** retrieval_model ******************** ************

def setup_train_model(im_feats, train_phase, im_labels, nrof_classes):
    alfa = 0.9
    # nrof_classes = 28783
    i_embed = embedding_model(im_feats, train_phase, im_labels)
    c_l   = embedding_loss(i_embed, im_labels, alfa, nrof_classes)
    loss = softmax_loss(i_embed, im_labels)
    total_loss = loss + c_l
    return total_loss

def add_fc(inputs, outdim, train_phase, scope_in):
    fc = fully_connected(inputs, outdim, activation_fn=None, scope=scope_in + '/fc')
    fc_bnorm = tf.layers.batch_normalization(fc, momentum=0.1, epsilon=1e-5,
                                             training=train_phase, name=scope_in + '/bnorm')
    fc_relu = tf.nn.relu(fc_bnorm, name=scope_in + '/relu')
    fc_out = tf.layers.dropout(fc_relu, seed=0, training=train_phase, name=scope_in + '/dropout')
    return fc_out

def embedding_loss(features, label, alfa, nrof_classes):
    nrof_features = features.get_shape()[1]
    centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32,
                              initializer=tf.constant_initializer(0), trainable=False)
    label = tf.reshape(label, [-1])
    centers_batch = tf.gather(centers, label)
    diff = (1 - alfa) * (centers_batch - features)
    #centers = tf.scatter_sub(centers, label, diff)
    center_loss = tf.reduce_mean(tf.square(features - centers_batch))
    #softmax_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=label, logits=features))
    #total_loss = softmax_loss + center_loss

    return  center_loss

  def embedding_model(im_feats, train_phase, im_labels,
                    fc_dim=2048, embed_dim=512):

    # Image branch.
    im_fc1 = add_fc(im_feats, fc_dim, train_phase, 'im_embed_1')
    im_fc2 = fully_connected(im_fc1, embed_dim, activation_fn=None,
                             scope='im_embed_2')

    return tf.nn.l2_normalize(im_fc2, 1, epsilon=1e-10)


def softmax_loss(feat, im_labels):
    label = tf.reshape(im_labels, [-1])
    softmax = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=label, logits=feat))

    return softmax

softmax_cross_entropy_with_logits nan

0 个答案: