TensorFlow,在训练了几个步骤后,AlexNet的输出都为零,有时每个维度的输出都相同

时间:2017-11-02 10:29:16

标签: tensorflow output

here is the output of AlexNet, I mean it's the output of the third full-connection

我不知道为什么所有输出都为零。我尝试减少图层,同时剪切最后两层,在训练几个步骤后,每个维度的输出都是相同的。任何想法将不胜感激。 这是主要的推理代码和初始值:

import tensorflow as tf
import numpy as np
import os
import csv
import cifar10
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

IMAGE_SIZES = 32
IMAGE_CHANELS = 3
NUM_CLASSES = 10
FIRST_CONV_NUM = 64
SECOND_CONV_NUM = 192
THIRD_CONV_NUM = 384
FOURTH_CONV_NUM = 256
FIFTH_CONV_NUM = 256
MAX_POOL_SIZE = 3
BATCH_SIZE = 100
FIRST_FC_UNIT_NUM = 4096
SECOND_FC_UNIT_NUM = 1000
DROP_OUT_PRO = 0.5
THIRD_FC_UNIT_NUM = NUM_CLASSES
TRAIN_EPOCH = 10
NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 50000
NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = 10000
NUM_EPOCHS_PER_DECAY = 350.0
LEARNING_RATE_DECAY_FACTOR = 0.1
INITIAL_LEARNING_RATE = 0.01
DISPLAY_STEPS = 5

def leaky_relu(x, alpha=0.0):
  return tf.nn.relu(x) - alpha * tf.nn.relu(-x)

def activation(x,alpha=0.0):
    if alpha > 0:
        return leaky_relu(x,alpha)
    else:
        return tf.nn.relu(x)


def Alex_Weight(weight_name, weight_shape, weight_stddev, weight_type):
    Weight = tf.truncated_normal(shape=weight_shape, stddev=weight_stddev,type=weight_type)
    return tf.Variable(initial_value=Weight, trainable=True, name=weight_name)

def Alex_Bias(bias_name, bias_shape, bias_type, bias_init=0.1):
    initial = tf.constant(bias_init, shape=bias_shape)
    return tf.Variable(initial, trainable=True, dtype=bias_type,name=bias_name)


def Alex_AddActivationSummary(out):
    tf.summary.histogram('/activations',out)
    tf.summary.scalar('/sparsity',tf.nn.zero_fraction(out))

def Alex_Conv(conv_name, input, weight, bias, strides, alpha=0.1,padding="SAME", activation=activation, act_name="ReLU"):
    with tf.name_scope(conv_name):
        conv = tf.nn.conv2d(input, weight, [1, strides, strides, 1], padding)
        pre_activation = tf.nn.bias_add(conv, bias)
    with tf.name_scope(act_name):
        conv = activation(pre_activation,alpha=alpha)
return conv

def Alex_Pool(conv, ksize, strides, pool_fuction=tf.nn.max_pool, padding="SAME"):
    return pool_fuction(conv, [1, ksize, ksize, 1], [1, strides,strides, 1], padding)

def Alex_Fully_Connect( input, weight, bias, activation=tf.nn.relu, act_name="ReLU"):
    with tf.name_scope("Wx_b"):
        y = tf.add(tf.matmul(input, weight), bias)
    with tf.name_scope(act_name):
        fc = activation(y, act_name)
    return fc

def Alex_Norm(norm_name, pool):
    with tf.name_scope(norm_name):
        norm = tf.nn.lrn(pool, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75,
                      name=norm_name)
    return norm

def Alex_Inference(images):
    with tf.name_scope("First_Conv"):
        W1 = Alex_Weight("Fist_Conv_Weight", [5, 5, IMAGE_CHANELS, FIRST_CONV_NUM], weight_stddev=0.01,
                        weight_type=tf.float32)
        bias1 = Alex_Bias("First_Conv_Bias", [FIRST_CONV_NUM], tf.float32,bias_init=0.0)
        first_conv = Alex_Conv("First_Conv", images, W1, bias1, strides=1, padding="SAME")
        Alex_AddActivationSummary(first_conv)

    with tf.name_scope('lrn1') as scope:
        lrn1 = tf.nn.local_response_normalization(first_conv,
                                                  alpha=1e-4,
                                                  beta=0.75,
                                                  depth_radius=2,
                                                  bias=2.0)

    with tf.name_scope("First_Pool"):
        first_pool = Alex_Pool(lrn1, MAX_POOL_SIZE, strides=2, padding="VALID")


    with tf.name_scope("Second_Conv"):
        W2 = Alex_Weight("Second_Conv_Weight", [5, 5, FIRST_CONV_NUM, SECOND_CONV_NUM], weight_stddev=0.01,
                        weight_type=tf.float32)
        bias2 = Alex_Bias("Second_Conv_Bias", [SECOND_CONV_NUM], tf.float32,bias_init=1.0)
        second_conv = Alex_Conv("Second_Conv", first_pool, W2, bias2, strides=1, padding="SAME")
        Alex_AddActivationSummary(second_conv)

    with tf.name_scope('lrn2') as scope:
        lrn2 = tf.nn.local_response_normalization(second_conv,
                                                  alpha=1e-4,
                                                  beta=0.75,
                                                  depth_radius=2,
                                                  bias=2.0)

    with tf.name_scope("Second_Pool"):
        second_pool = Alex_Pool(lrn2, MAX_POOL_SIZE, strides=2, padding="VALID")

    with tf.name_scope("Third_Conv"):
        W3 = Alex_Weight("Third_Conv_Weight", [3, 3, SECOND_CONV_NUM, THIRD_CONV_NUM], weight_stddev=0.01,
                        weight_type=tf.float32)
        bias3 = Alex_Bias("Third_Conv_Bias", [THIRD_CONV_NUM], tf.float32,bias_init=0.0)
        third_conv = Alex_Conv("Third_Conv", second_pool, W3, bias3, strides=1, padding="SAME")
        Alex_AddActivationSummary(third_conv)

    with tf.name_scope("Fourth_Conv"):
        W4 = Alex_Weight("Fourth_Conv_Weight", [3, 3, THIRD_CONV_NUM, FOURTH_CONV_NUM], weight_stddev=0.01,
                        weight_type=tf.float32)
        bias4 = Alex_Bias("Fourth_Conv_Bias", [FOURTH_CONV_NUM], tf.float32,bias_init=1.0)
        fourth_conv = Alex_Conv("Fourth_Conv", third_conv, W4, bias4, strides=1, padding="SAME")
        Alex_AddActivationSummary(fourth_conv)

    with tf.name_scope("Fifth_Conv"):
        W5 = Alex_Weight("Fifth_Conv_Weight", [3, 3, FOURTH_CONV_NUM, FIFTH_CONV_NUM], weight_stddev=0.01,
                        weight_type=tf.float32)
        bias5 = Alex_Bias("Fifth_Conv_Bias", [FIFTH_CONV_NUM], tf.float32,bias_init=1.0)
        fifth_conv = Alex_Conv("Fifth_Conv", fourth_conv, W5, bias5, strides=1, padding="SAME")
        Alex_AddActivationSummary(fifth_conv)

    with tf.name_scope("Third_Pool"):
        third_pool = Alex_Pool(fifth_conv, MAX_POOL_SIZE, strides=2, padding="VALID")

    with tf.name_scope("Flatten"):
        flatten = tf.reshape(third_pool, [BATCH_SIZE, -1])
        flatten_dim = flatten.get_shape()[1].value

    with tf.name_scope("First_Fully_Connection"):
        W = Alex_Weight("Fist_FC_Weight", [flatten_dim, FIRST_FC_UNIT_NUM], weight_stddev=4e-2, weight_type=tf.float32)
        bias = Alex_Bias("First_FC_Bias", [FIRST_FC_UNIT_NUM], tf.float32, bias_init=1.0)
        fc1 = Alex_Fully_Connect(flatten, W, bias, activation=tf.nn.relu, act_name="ReLU")
        Alex_AddActivationSummary(fc1)

    with tf.name_scope("Drop_Out_1"):
        drop_out_1 = tf.nn.dropout(fc1, DROP_OUT_PRO)

    with tf.name_scope("Second_Fully_Connection"):
        W = Alex_Weight("Second_FC_Weight", [FIRST_FC_UNIT_NUM, SECOND_FC_UNIT_NUM], weight_stddev=4e-2,
                        weight_type=tf.float32)
        bias = Alex_Bias("Second_FC_Bias", [SECOND_FC_UNIT_NUM], tf.float32, bias_init=1.0)
        fc2 = Alex_Fully_Connect(drop_out_1, W, bias, activation=tf.nn.relu, act_name="ReLU")
        Alex_AddActivationSummary(fc2)

    with tf.name_scope("Drop_Out_2"):
        drop_out_2 = tf.nn.dropout(fc2, DROP_OUT_PRO)

    with tf.name_scope("Third_Fully_Connection"):
        W = Alex_Weight("Third_FC_Weight", [SECOND_FC_UNIT_NUM, THIRD_FC_UNIT_NUM], weight_stddev=1/SECOND_FC_UNIT_NUM,
                        weight_type=tf.float32)
        bias = Alex_Bias("Third_FC_Bias", [THIRD_FC_UNIT_NUM], tf.float32,bias_init=1.0)
        fc3 = Alex_Fully_Connect(drop_out_2, W, bias, activation=tf.nn.relu, act_name="ReLU")
        Alex_AddActivationSummary(fc3)

    return fc3

0 个答案:

没有答案