here is the output of AlexNet, I mean it's the output of the third full-connection
我不知道为什么所有输出都为零。我尝试减少图层,同时剪切最后两层,在训练几个步骤后,每个维度的输出都是相同的。任何想法将不胜感激。 这是主要的推理代码和初始值:
import tensorflow as tf
import numpy as np
import os
import csv
import cifar10
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
IMAGE_SIZES = 32
IMAGE_CHANELS = 3
NUM_CLASSES = 10
FIRST_CONV_NUM = 64
SECOND_CONV_NUM = 192
THIRD_CONV_NUM = 384
FOURTH_CONV_NUM = 256
FIFTH_CONV_NUM = 256
MAX_POOL_SIZE = 3
BATCH_SIZE = 100
FIRST_FC_UNIT_NUM = 4096
SECOND_FC_UNIT_NUM = 1000
DROP_OUT_PRO = 0.5
THIRD_FC_UNIT_NUM = NUM_CLASSES
TRAIN_EPOCH = 10
NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 50000
NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = 10000
NUM_EPOCHS_PER_DECAY = 350.0
LEARNING_RATE_DECAY_FACTOR = 0.1
INITIAL_LEARNING_RATE = 0.01
DISPLAY_STEPS = 5
def leaky_relu(x, alpha=0.0):
return tf.nn.relu(x) - alpha * tf.nn.relu(-x)
def activation(x,alpha=0.0):
if alpha > 0:
return leaky_relu(x,alpha)
else:
return tf.nn.relu(x)
def Alex_Weight(weight_name, weight_shape, weight_stddev, weight_type):
Weight = tf.truncated_normal(shape=weight_shape, stddev=weight_stddev,type=weight_type)
return tf.Variable(initial_value=Weight, trainable=True, name=weight_name)
def Alex_Bias(bias_name, bias_shape, bias_type, bias_init=0.1):
initial = tf.constant(bias_init, shape=bias_shape)
return tf.Variable(initial, trainable=True, dtype=bias_type,name=bias_name)
def Alex_AddActivationSummary(out):
tf.summary.histogram('/activations',out)
tf.summary.scalar('/sparsity',tf.nn.zero_fraction(out))
def Alex_Conv(conv_name, input, weight, bias, strides, alpha=0.1,padding="SAME", activation=activation, act_name="ReLU"):
with tf.name_scope(conv_name):
conv = tf.nn.conv2d(input, weight, [1, strides, strides, 1], padding)
pre_activation = tf.nn.bias_add(conv, bias)
with tf.name_scope(act_name):
conv = activation(pre_activation,alpha=alpha)
return conv
def Alex_Pool(conv, ksize, strides, pool_fuction=tf.nn.max_pool, padding="SAME"):
return pool_fuction(conv, [1, ksize, ksize, 1], [1, strides,strides, 1], padding)
def Alex_Fully_Connect( input, weight, bias, activation=tf.nn.relu, act_name="ReLU"):
with tf.name_scope("Wx_b"):
y = tf.add(tf.matmul(input, weight), bias)
with tf.name_scope(act_name):
fc = activation(y, act_name)
return fc
def Alex_Norm(norm_name, pool):
with tf.name_scope(norm_name):
norm = tf.nn.lrn(pool, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75,
name=norm_name)
return norm
def Alex_Inference(images):
with tf.name_scope("First_Conv"):
W1 = Alex_Weight("Fist_Conv_Weight", [5, 5, IMAGE_CHANELS, FIRST_CONV_NUM], weight_stddev=0.01,
weight_type=tf.float32)
bias1 = Alex_Bias("First_Conv_Bias", [FIRST_CONV_NUM], tf.float32,bias_init=0.0)
first_conv = Alex_Conv("First_Conv", images, W1, bias1, strides=1, padding="SAME")
Alex_AddActivationSummary(first_conv)
with tf.name_scope('lrn1') as scope:
lrn1 = tf.nn.local_response_normalization(first_conv,
alpha=1e-4,
beta=0.75,
depth_radius=2,
bias=2.0)
with tf.name_scope("First_Pool"):
first_pool = Alex_Pool(lrn1, MAX_POOL_SIZE, strides=2, padding="VALID")
with tf.name_scope("Second_Conv"):
W2 = Alex_Weight("Second_Conv_Weight", [5, 5, FIRST_CONV_NUM, SECOND_CONV_NUM], weight_stddev=0.01,
weight_type=tf.float32)
bias2 = Alex_Bias("Second_Conv_Bias", [SECOND_CONV_NUM], tf.float32,bias_init=1.0)
second_conv = Alex_Conv("Second_Conv", first_pool, W2, bias2, strides=1, padding="SAME")
Alex_AddActivationSummary(second_conv)
with tf.name_scope('lrn2') as scope:
lrn2 = tf.nn.local_response_normalization(second_conv,
alpha=1e-4,
beta=0.75,
depth_radius=2,
bias=2.0)
with tf.name_scope("Second_Pool"):
second_pool = Alex_Pool(lrn2, MAX_POOL_SIZE, strides=2, padding="VALID")
with tf.name_scope("Third_Conv"):
W3 = Alex_Weight("Third_Conv_Weight", [3, 3, SECOND_CONV_NUM, THIRD_CONV_NUM], weight_stddev=0.01,
weight_type=tf.float32)
bias3 = Alex_Bias("Third_Conv_Bias", [THIRD_CONV_NUM], tf.float32,bias_init=0.0)
third_conv = Alex_Conv("Third_Conv", second_pool, W3, bias3, strides=1, padding="SAME")
Alex_AddActivationSummary(third_conv)
with tf.name_scope("Fourth_Conv"):
W4 = Alex_Weight("Fourth_Conv_Weight", [3, 3, THIRD_CONV_NUM, FOURTH_CONV_NUM], weight_stddev=0.01,
weight_type=tf.float32)
bias4 = Alex_Bias("Fourth_Conv_Bias", [FOURTH_CONV_NUM], tf.float32,bias_init=1.0)
fourth_conv = Alex_Conv("Fourth_Conv", third_conv, W4, bias4, strides=1, padding="SAME")
Alex_AddActivationSummary(fourth_conv)
with tf.name_scope("Fifth_Conv"):
W5 = Alex_Weight("Fifth_Conv_Weight", [3, 3, FOURTH_CONV_NUM, FIFTH_CONV_NUM], weight_stddev=0.01,
weight_type=tf.float32)
bias5 = Alex_Bias("Fifth_Conv_Bias", [FIFTH_CONV_NUM], tf.float32,bias_init=1.0)
fifth_conv = Alex_Conv("Fifth_Conv", fourth_conv, W5, bias5, strides=1, padding="SAME")
Alex_AddActivationSummary(fifth_conv)
with tf.name_scope("Third_Pool"):
third_pool = Alex_Pool(fifth_conv, MAX_POOL_SIZE, strides=2, padding="VALID")
with tf.name_scope("Flatten"):
flatten = tf.reshape(third_pool, [BATCH_SIZE, -1])
flatten_dim = flatten.get_shape()[1].value
with tf.name_scope("First_Fully_Connection"):
W = Alex_Weight("Fist_FC_Weight", [flatten_dim, FIRST_FC_UNIT_NUM], weight_stddev=4e-2, weight_type=tf.float32)
bias = Alex_Bias("First_FC_Bias", [FIRST_FC_UNIT_NUM], tf.float32, bias_init=1.0)
fc1 = Alex_Fully_Connect(flatten, W, bias, activation=tf.nn.relu, act_name="ReLU")
Alex_AddActivationSummary(fc1)
with tf.name_scope("Drop_Out_1"):
drop_out_1 = tf.nn.dropout(fc1, DROP_OUT_PRO)
with tf.name_scope("Second_Fully_Connection"):
W = Alex_Weight("Second_FC_Weight", [FIRST_FC_UNIT_NUM, SECOND_FC_UNIT_NUM], weight_stddev=4e-2,
weight_type=tf.float32)
bias = Alex_Bias("Second_FC_Bias", [SECOND_FC_UNIT_NUM], tf.float32, bias_init=1.0)
fc2 = Alex_Fully_Connect(drop_out_1, W, bias, activation=tf.nn.relu, act_name="ReLU")
Alex_AddActivationSummary(fc2)
with tf.name_scope("Drop_Out_2"):
drop_out_2 = tf.nn.dropout(fc2, DROP_OUT_PRO)
with tf.name_scope("Third_Fully_Connection"):
W = Alex_Weight("Third_FC_Weight", [SECOND_FC_UNIT_NUM, THIRD_FC_UNIT_NUM], weight_stddev=1/SECOND_FC_UNIT_NUM,
weight_type=tf.float32)
bias = Alex_Bias("Third_FC_Bias", [THIRD_FC_UNIT_NUM], tf.float32,bias_init=1.0)
fc3 = Alex_Fully_Connect(drop_out_2, W, bias, activation=tf.nn.relu, act_name="ReLU")
Alex_AddActivationSummary(fc3)
return fc3