使用激活功能Selu训练CNN模型

时间:2018-09-05 06:37:17

标签: python tensorflow deep-learning conv-neural-network

我正在使用Tensorflow训练自己的模型。但是,当我将激活功能从Relu更改为Selu时遇到了麻烦。

这就是发生的事情。学习曲线意外下降,我不知道发生了什么。

我的学习曲线

enter image description here

像这样

据我所知,Selu可以防止过度拟合,因此我尝试在模型中实现它。我想使用Selu时有什么提示或条件吗?

这是我的代码:

这是我更改激活功能的地方

-----

def conv2d_maxpool(x_tensor, conv_num_outputs, conv_ksize, conv_strides, pool_ksize, pool_strides, layer_name):

conv_layer = tf.layers.conv2d(x_tensor, conv_num_outputs, kernel_size=conv_ksize, strides=conv_strides, activation=tf.nn.selu, name = layer_name)   
conv_layer = tf.layers.max_pooling2d(conv_layer, pool_size=pool_ksize, strides=pool_strides)

return conv_layer

-----

tf.reset_default_graph()

#### placeholder ####
input_img = tf.placeholder(dtype=tf.float32, shape=(None, img_size, img_size, 3))
y_true = tf.placeholder(dtype=tf.float32, shape=(None, num_class))
keep_prob = tf.placeholder(tf.float32, name="keep_prob")
lr_in = tf.placeholder(dtype = tf.float32, name  = 'learning_rate')

conv_ksize = (3,3)
conv_strides = (1,1)
pool_ksize = (2,2)
pool_strides = (2,2)
n_filters_1 = 32
n_filters_2 = 64
n_filters_3 = 128
n_filters_4 = 256
onebyone_ksize = (1,1)

#CNN
conv_1 = conv2d_maxpool(input_img, n_filters_1, conv_ksize, conv_strides, pool_ksize, pool_strides, layer_name = "conv1")

# conv_1 = tf.layers.conv2d(conv_1, conv_num_outputs, kernel_size=conv_ksize, strides=conv_strides, activation=tf.nn.relu)  

# conv_1_norm = tf.layers.batch_normalization(conv_1, name = "batch_norm1")
# conv_1_dropout = tf.layers.dropout(conv_1_norm, rate = keep_prob, training = True, name = "dropout1")

conv_2 = conv2d_maxpool(conv_1, n_filters_2, conv_ksize, conv_strides, pool_ksize, pool_strides, layer_name = "conv2")
# conv_2_norm = tf.layers.batch_normalization(conv_2)

conv_3 = conv2d_maxpool(conv_2, n_filters_3, conv_ksize, conv_strides, pool_ksize, pool_strides, layer_name = "conv3")
# conv_3_norm = tf.layers.batch_normalization(conv_3, name = "batch_norm3")
# conv_3_dropout = tf.layers.dropout(conv_3_norm, rate = keep_prob, training = True, name = "dropout3")

conv_4 = conv2d_maxpool(conv_3, n_filters_4, conv_ksize, conv_strides, pool_ksize, pool_strides, layer_name = "conv4")


flatten = tf.layers.flatten(conv_4)

fc1 = tf.layers.dense(flatten, 256, activation = tf.nn.relu)

out = tf.layers.dense(fc1, 6, activation=None, name= "logits") #logit

predict = tf.nn.softmax(out)

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = out, labels = y_true))
optimizer = tf.train.AdamOptimizer(lr).minimize(cost)

##accuracy
correct_pred = tf.equal(tf.argmax(out, 1), tf.argmax(y_true, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32), name='accuracy')

培训

#history/record
train_loss, train_acc = [], []
valid_loss, valid_acc = [], []
update_per_epoch = int(np.floor(X_train.shape[0] / batch_size))

## early stopping and learning rate congig
es_patience = 10
es_n = 0

lr_patience = 3
lr_n = 0

save_model_path = './save'

saver = tf.train.Saver()
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)


# Initializing the variables
batch_gen = img_gen.flow(generator_input(X_train), y_train, batch_size = 32)
val_batch_gen = img_gen.flow(generator_input(X_valid), y_valid, batch_size = len(X_valid))

for i in range(epoch):

    epoch_loss = 0
    epoch_acc  = 0

    for j in range(update_per_epoch):                    


        image, label = next(batch_gen)

        _, this_loss, this_acc = sess.run([optimizer, cost, accuracy], feed_dict={
            input_img : image,
            y_true : label,
            lr_in: lr,
            keep_prob : keep_probability
        })

        epoch_loss += this_loss
        epoch_acc += this_acc

    ## end of epoch

    epoch_loss /= update_per_epoch
    epoch_acc /= update_per_epoch

    train_loss.append(epoch_loss)
    train_acc.append(epoch_acc)

    print('Epoch {:>2}   Loss: {:>4.4f} Training Accuracy: {:.6f}'.format(i + 1, epoch_loss, epoch_acc))



    valid_image, valid_label = next(val_batch_gen)

    valid_this_loss, valid_this_acc = sess.run([cost, accuracy], feed_dict = {
        input_img: valid_image,
        y_true: valid_label,
        lr_in: lr,
        keep_prob: 1.
    })

    valid_loss.append(valid_this_loss)
    valid_acc.append(valid_this_acc)

    print('Epoch {:>2}   Loss: {:>4.4f} Validation Accuracy: {:.6f}'.format(i + 1,valid_this_loss, valid_this_acc))



    # early stop

    if valid_this_loss > np.min(valid_loss):
        es_n += 1
        lr_n += 1
    else:
        es_n = 0
        lr_n = 0
        saver.save(sess, os.path.join(os.getcwd(), 'bestsession.ckpt'))

    # early stop
    if es_n >= es_patience:
        print("-----------early stopping-------------")
        break

    # adaptive learning rate

    if lr_n >= lr_patience:
        lr *= lr_decay_rate
        lr_n = 0
        print("-----------adjust learning rate------------")







# Save Model
save_path = saver.save(sess, save_model_path)
print('-----model save ------')

----------- 18/09/07 ------------

我总是可以重现相同的结果。

这是我的代码,我用Jupyter编写。但是抱歉,我无法上传培训数据:

https://drive.google.com/open?id=1uUE32KrNmWnhLbV8z-fyHSMu6zGCCG_e

0 个答案:

没有答案