我在训练的张量流模型上推断出一个奇怪的问题。
首先是模型:
tf.reset_default_graph()
sess = tf.Session()
def model(x_in,pkeep):
epsilon = 1e-3
# 1. 6x6 Conv2D (28,28,1) -> (28,28,6) with ReLU
# 2. 5x5 Conv2D (28,28,6) -> (14,14,12) with ReLU
# 3. 4x4 Conv2D (14,14,12) -> (7,7,24) with ReLU
# 4. 4x4 Conv2D # 4. Flatten Layer (7,7,24) -> (1176,1)
# 5. FC Layer (1176,1) -> (200,1) with ReLU
# 6. Dropout
# 7. FC Layer (200,1) -> (10,1)
K = 6 # first convolutional layer output depth
L = 12 # second convolutional layer output depth
M = 24 # third convolutional layer
N = 48 # fourth convolutional layer
O = 96 # five convolutional layer
S = 200 # fully connected layer
numClasses = 4
with tf.name_scope("conv1"):
#wc1 = tf.Variable(tf.truncated_normal([6, 6, 1, K], stddev=0.1), name="wc1")
wc1 = tf.get_variable(shape=[6, 6, 1, K],initializer=tf.contrib.layers.xavier_initializer(), name="wc1")
bc1 = tf.Variable(tf.constant(0.1, shape=[K]), name="bc1")
conv1 = tf.nn.conv2d(x_in, wc1, strides=[1, 1, 1, 1], padding="SAME")
conv1 = tf.nn.relu(conv1 + bc1)
tf.summary.histogram("weights_conv_1", wc1)
tf.summary.histogram("biases_conv_1", bc1)
tf.summary.histogram("activations", conv1)
# Output: 28x28xK
with tf.name_scope("conv2"):
#wc2 = tf.Variable(tf.truncated_normal([5, 5, K, L], stddev=0.1), name="wc2")
wc2 = tf.get_variable(shape=[5, 5, K, L],initializer=tf.contrib.layers.xavier_initializer(), name="wc2")
bc2 = tf.Variable(tf.constant(0.1, shape=[L]), name="bc2")
conv2 = tf.nn.conv2d(conv1, wc2, strides=[1, 1, 2, 1], padding="SAME")
conv2 = tf.nn.relu(conv2 + bc2)
tf.summary.histogram("weights_conv_2", wc2)
tf.summary.histogram("biases_conv_2", bc2)
tf.summary.histogram("activations", conv2)
# Output: 14x14xL
with tf.name_scope("conv3"):
#wc3 = tf.Variable(tf.truncated_normal([4, 4, L, M], stddev=0.1), name="wc3")
wc3 = tf.get_variable(shape=[4, 4, L, M],initializer=tf.contrib.layers.xavier_initializer(),name="wc3")
bc3 = tf.Variable(tf.constant(0.1, shape=[M]), name="bc3")
conv3 = tf.nn.conv2d(conv2, wc3, strides=[1, 1, 2, 1], padding="SAME")
conv3 = tf.nn.relu(conv3 + bc3)
tf.summary.histogram("weights_conv_3", wc3)
tf.summary.histogram("biases_conv_3", bc3)
tf.summary.histogram("activations", conv3)
with tf.name_scope("conv4"):
#wc4 = tf.Variable(tf.truncated_normal([4, 4, M, N], stddev=0.1), name="wc4")
wc4 = tf.get_variable(shape=[4, 4, M, N],initializer=tf.contrib.layers.xavier_initializer(), name="wc4")
bc4 = tf.Variable(tf.constant(0.1, shape=[N]), name="bc4")
conv4 = tf.nn.conv2d(conv3, wc4, strides=[1, 1, 1, 1], padding="SAME")
conv4 = tf.nn.relu(conv4 + bc4)
tf.summary.histogram("weights_conv_4", wc4)
tf.summary.histogram("biases_conv_4", bc4)
tf.summary.histogram("activations", conv4)
with tf.name_scope("conv5"):
#wc5 = tf.Variable(tf.truncated_normal([3, 3, N, O], stddev=0.1), name="wc5")
wc5 = tf.get_variable(shape=[3, 3, N, O],initializer=tf.contrib.layers.xavier_initializer(), name="wc5")
bc5 = tf.Variable(tf.constant(0.1, shape=[O]), name="bc5")
conv5 = tf.nn.conv2d(conv4, wc5, strides=[1, 1, 1, 1], padding="SAME")
conv5 = tf.nn.relu(conv5 + bc5)
tf.summary.histogram("weights_conv_5", wc5)
tf.summary.histogram("biases_conv_5", bc5)
tf.summary.histogram("activations", conv5)
# Output: 7x7xM
flattened = tf.reshape(conv5, [-1, 8 * 31 * O]) #7 * 7 * M
with tf.name_scope("fc1"):
#wf1 = tf.Variable(tf.truncated_normal([8 * 31 * O, S], stddev=0.1), name="wf1")
wf1 = tf.get_variable(shape=[8 * 31 * O, S],initializer=tf.contrib.layers.xavier_initializer(), name="wf1")
bf1 = tf.Variable(tf.constant(0.1, shape=[S]), name="bf1")
fc1 = tf.matmul(flattened, wf1) + bf1
batch_mean2, batch_var2 = tf.nn.moments(fc1,[0])
scale1 = tf.Variable(tf.ones([S]))
beta1 = tf.Variable(tf.zeros([S]))
BN2 = tf.nn.batch_normalization(fc1,batch_mean2,batch_var2,beta1,scale1,epsilon)
relu = tf.nn.relu(BN2)
dropout = tf.nn.dropout(relu,pkeep,name="dropout")
tf.summary.histogram("weights_fc_1", wf1)
tf.summary.histogram("biases_conv_1", bf1)
tf.summary.histogram("activations", fc1)
with tf.name_scope("fc2"):
#wf2 = tf.Variable(tf.truncated_normal([S, numClasses], stddev=0.1), name="wf2")
wf2 = tf.get_variable(shape=[S, numClasses],initializer=tf.contrib.layers.xavier_initializer(), name="wf2")
bf2 = tf.Variable(tf.constant(0.1, shape=[numClasses]), name="bf2")
logits = tf.matmul(dropout, wf2) + bf2
tf.summary.histogram("weights_fc_2", wf2)
tf.summary.histogram("biases_conv_2", bf2)
tf.summary.histogram("activations", logits)
return logits
def compute_cost(logits,labels):
with tf.name_scope("loss"):
xent = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels), name="xent")
tf.summary.scalar("loss", xent)
return xent
x = tf.placeholder(tf.float32, shape=[None, 968], name="x")
x_image = tf.reshape(x, [-1, 8, 121, 1])
y = tf.placeholder(tf.float32, shape=[None, 4], name="y")
pkeep = tf.placeholder(tf.float32,name="pkeep")
step = tf.placeholder(tf.int32,name="step")
learning_rate = 0.0003
logits = tf.placeholder(tf.float32,name="logits")
logits = model(x_image,pkeep)
cost = compute_cost(logits,y)
with tf.name_scope("train"):
lr = learning_rate + tf.train.exponential_decay(0.003, step, 2000, 1/math.e)
optimizer = tf.train.AdamOptimizer(lr).minimize(cost)
with tf.name_scope("accuracy"):
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32),name="accuracyV")
tf.summary.scalar("accuracy", accuracy)
tf.add_to_collection("accuracy", accuracy)
我现在尝试使用测试集中的一个示例进行推理测试(在训练期间我获得了大约95%的准确率)。这是我的代码:
n = 0
with tf.Session() as sess:
new_saver.restore(sess, tf.train.latest_checkpoint('C:\\...'))
predictions =sess.run(logits,feed_dict={x: x_test[n,:][np.newaxis],pkeep: 1.0})
pred_max = np.argmax(predictions, 1)
print(pred_max )
print(y_test[n])
结果我总是(独立于y_test标签)得到相同的预测 - 0级具有几乎相同的logits(0.249904 -0.0239285 -0.0824487 0.0551006)。现在奇怪的是,我在同一时间对多个图像进行推理测试。我完全得到了其他价值观和正确的预测:
with tf.Session() as sess:
new_saver.restore(sess, tf.train.latest_checkpoint('C:\\...'))
predictions =sess.run(logits,feed_dict={x: x_test[0:10,:],pkeep: 1.0})
pred_max = np.argmax(predictions, 1)
print(pred_max)
print(y_test[0:10])
例如,相同logits(index = 0)的值为-6.45629 3.54225 3.75237 -7.38902。这使我的解释更有意义。也导致正确的课程。
有人解释这种奇怪的行为吗?