Question

我的Convnet输出非常不寻常。当打印出前向传播结果的输出向量时，对于数据集中的整个标签，它是完全[0,0,0,1]恒定的。我怀疑我的建筑有误。

import os
import sys
import tensorflow as tf
import Input
import os, re
"""
This is a model based on the CIFAR10 Model.
The general structure of the program and a few functions are
borrowed from Tensorflow example of the CIFAR10 model.
https://github.com/tensorflow/tensorflow/tree/r0.7/tensorflow/models/image/cifar10/
As quoted:
"If you are now interested in developing and training your own image classification
system, we recommend forking this tutorial and replacing components to address your
image classification problem."
Source:
https://www.tensorflow.org/tutorials/deep_cnn/
"""

FLAGS = tf.app.flags.FLAGS
TOWER_NAME = 'tower'

tf.app.flags.DEFINE_integer('batch_size', 1, "hello")
tf.app.flags.DEFINE_string('data_dir', 'data', "hello")

def _activation_summary(x):
    with tf.device('/cpu:0'):
        tensor_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', x.op.name)
        tf.histogram_summary(tensor_name + '/activations', x)
        tf.scalar_summary(tensor_name + '/sparsity', tf.nn.zero_fraction(x))

def inputs():
  if not FLAGS.data_dir:
    raise ValueError('Source Data Missing')
  data_dir = FLAGS.data_dir
  images, labels = Input.inputs(data_dir = data_dir, batch_size = FLAGS.batch_size)
  return images, labels

def eval_inputs():
  data_dir = FLAGS.data_dir
  images, labels = Input.eval_inputs(data_dir = data_dir, batch_size = 1)
  return images, labels

def weight_variable(shape):
    with tf.device('/gpu:0'):
        initial = tf.random_normal(shape, stddev=0.1)
        return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.1, shape = shape)
    return tf.Variable(initial)

def conv(images, W):
    with tf.device('/gpu:0'):
        return tf.nn.conv2d(images, W, strides = [1, 1, 1, 1], padding = 'SAME')

def forward_propagation(images):
  with tf.variable_scope('conv1') as scope:
      conv1_feature = weight_variable([20, 20, 3, 20])
      conv1_bias = bias_variable([20])
      image_matrix = tf.reshape(images, [-1, 1686, 1686, 3])
      conv1_result = tf.nn.relu(conv(image_matrix, conv1_feature) + conv1_bias)
      _activation_summary(conv1_result)

  with tf.variable_scope('conv2') as scope:
      conv2_feature = weight_variable([10, 10, 20, 40])
      conv2_bias = bias_variable([40])
      conv2_result = tf.nn.relu(conv(conv1_result, conv2_feature) + conv2_bias)
      _activation_summary(conv2_result)

      conv2_pool = tf.nn.max_pool(conv2_result, ksize = [1, 281, 281, 1], strides = [1, 281, 281, 1], padding = 'SAME')

  with tf.variable_scope('conv3') as scope:
      conv3_feature = weight_variable([5, 5, 40, 80])
      conv3_bias = bias_variable([80])
      conv3_result = tf.nn.relu(conv(conv2_pool, conv3_feature) + conv3_bias)
      _activation_summary(conv3_result)

      conv3_pool = tf.nn.max_pool(conv3_result, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = 'SAME')

  with tf.variable_scope('local3') as scope:
      perceptron1_weight = weight_variable([3 * 3 * 80, 10])
      perceptron1_bias = bias_variable([10])
      flatten_dense_connect = tf.reshape(conv3_pool, [1, -1])
      compute_perceptron1_layer = tf.nn.relu(tf.matmul(flatten_dense_connect, perceptron1_weight) + perceptron1_bias)
      _activation_summary(compute_perceptron1_layer)

  with tf.variable_scope('softmax_connect') as scope:
      perceptron3_weight = weight_variable([10, 4])
      perceptron3_bias = bias_variable([4])
      y_conv = tf.nn.softmax(tf.matmul(compute_perceptron1_layer, perceptron3_weight) + perceptron3_bias)
      _activation_summary(y_conv)
      return y_conv

def error(forward_propagation_results, labels):
    with tf.device('/cpu:0'):
        labels = tf.cast(labels, tf.int64)
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(forward_propagation_results, labels)
        cost = tf.reduce_mean(cross_entropy)

        tf.add_to_collection('losses', cost)

        tf.scalar_summary('LOSS', cost)

        return cost

def train(cost):
    with tf.device('/gpu:0'):
        train_loss = tf.train.GradientDescentOptimizer(learning_rate = 0.01).minimize(cost)
        return train_loss

Answer 1

主要问题在于Softmax被调用两次。

Softmax在代码的forward_propagation部分被调用，并且被放置在Tensorflow交叉熵代码中，该代码已经包含softmax，因此导致输出异常。

Tensorflow Convnet Strange Softmax输出

1 个答案: