全连接层上的爆炸渐变

时间:2018-08-26 07:16:24

标签: python tensorflow neural-network deep-learning mnist

我正在尝试使用深度学习模型在MNIST数据集上学习。 我的模型的格式是- 输入(28 * 28 * 1)

Conv2d(14 * 14 * 32)

Conv2d(7 * 7 * 64)-扁平化

FC(3164 * 1024)

FC(1024 * 10)

MNIST的10类预测

%matplotlib inline
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np

mnist = tf.keras.datasets.mnist
(x_train, y_train),(x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
y_train,y_test=one_hot(y_train),one_hot(y_test)
x_train=np.reshape(x_train,[x_train.shape[0],28,28,1])
x_test=np.reshape(x_test,[x_test.shape[0],28,28,1])

x_dataset=tf.data.Dataset.from_tensor_slices(x_train)
y_dataset=tf.data.Dataset.from_tensor_slices(y_train)
train_dataset=tf.data.Dataset.zip((x_dataset,y_dataset)).repeat().batch(50)
iterator=tf.data.Iterator.from_structure(train_dataset.output_types,train_dataset.output_shapes)
next_element=iterator.get_next()
training_init_op=iterator.make_initializer(train_dataset)

x_testds=tf.data.Dataset.from_tensor_slices(x_test)
y_testds=tf.data.Dataset.from_tensor_slices(y_test)
testds=tf.data.Dataset.zip((x_testds,y_testds)).repeat().batch(2000)
valid_inti_op=iterator.make_initializer(testds)
##%%##
def one_hot(y_train):
    y_train1=np.zeros((y_train.shape[0],10))
    for i in range(y_train.shape[0]):
        y_train1[i][y_train[i]]=1
    return y_train1

def conv_layer(input,channels_in,channels_out,name="conv"):
    with tf.name_scope(name):
        input=tf.cast(input,tf.float32)
        w=tf.Variable(tf.truncated_normal([5,5,channels_in,channels_out],stddev=0.1),name="W")
        b=tf.Variable(tf.truncated_normal([channels_out],stddev=0.1),name="B")
        conv=tf.nn.conv2d(input,w,strides=[1,1,1,1],padding="SAME")
        act=tf.nn.relu(conv+b)
        tf.summary.histogram("weights",w)
        tf.summary.histogram("biases",b)
        tf.summary.histogram("activation",act)
        return act

def fc_layer(input,channels_in,channels_out,name="fc"):
with tf.name_scope(name):
    w=tf.Variable(tf.truncated_normal([channels_in,channels_out],stddev=0.1),name="W")
    b=tf.Variable(tf.zeros([channels_out]),name="B")
    act=tf.nn.relu(tf.matmul(input,w)+b)
    tf.summary.histogram("weights",w)
    tf.summary.histogram("biases",b)
    tf.summary.histogram("activation",act)
    return act    
conv1=conv_layer(next_element[0],1,32,"conv1")
pool1=tf.nn.max_pool(conv1,ksize=[1,2,2,1],strides=[1,2,2,1],padding="SAME",name="pool1")

conv2=conv_layer(pool1,32,64,"conv2")
pool2=tf.nn.max_pool(conv2,ksize=[1,2,2,1],strides=[1,2,2,1],padding="SAME",name="pool2")
flattened=tf.reshape(pool2,[-1,7*7*64])

fc1=fc_layer(flattened,7*7*64,1024,"fc1")
logits=fc_layer(fc1,1024,10,"fc2")

##%%##
with tf.name_scope("cross_entropy"):
    cross_entropy=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits,labels=next_element[1]))
with tf.name_scope("train"):
    train_step=tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
with tf.name_scope("accuracy"):
    correct_prediction=tf.equal(tf.argmax(logits,1),tf.argmax(next_element[1],1))
    accuracy=tf.reduce_mean(tf.cast(correct_prediction,tf.float32))

##%%
sess=tf.Session()
tf.summary.scalar('cross_entropy',cross_entropy)
tf.summary.scalar('accuracy',accuracy)
tf.summary.image('input',next_element[0])
merged_summary=tf.summary.merge_all()
writer=tf.summary.FileWriter("D:/work/ML/tensorboard/MNIST/deep/4")
writer.add_graph(sess.graph)
##%%
sess.run(tf.global_variables_initializer())
sess.run(training_init_op)
for i in range(600):
    s=sess.run(merged_summary)
    if(i%5==0):
        writer.add_summary(s,i)
        print(i,end="\r")
sess.run(valid_inti_op)
for i in range(1,6):
    s1=sess.run(merged_summary)
    writer.add_summary(s1,601+i)

我的准确性和交叉熵卡住了。在尝试使用张量板后,问题似乎是我的FC层的权重被卡在非常大的值上,即使我确实将它们初始化为0(如果这确实是错误的话)我也不知道如何解决,如果不是这样的话我不知道错误是什么。

enter image description here

enter image description here

enter image description here

1 个答案:

答案 0 :(得分:0)

输入数据后,sess.run(train_step)固定了我的代码