Question

我正在尝试在tensorflow中创建一个CNN模型，我无法理解一个问题：

我的模型是一个名为convolutional_neural_network的函数中的标准模型3d模型。

配置为：

    self.prediction = self.convolutional_neural_network(self.x, img_sz, n_slices)
    self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=self.y_,
                                                                  logits=self.prediction))
    optimizer = tf.train.AdamOptimizer(self.learning_rate).minimize(self.cost)
    correct_prediction = tf.equal(tf.argmax(self.prediction, 1), tf.argmax(self.y_, 1))
    self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

其中self.y_是放置标签的占位符。

在某些时候，我正在尝试执行这段代码：

            feed_dict = {self.x: batch_img, self.y_: batch_label, self.keep_prob:
                    keep_rate, self.learning_rate: learning_rate}

            validation_cost = self.cost.eval(
                feed_dict = feed_dict )
            print "Validation cost: "+str(validation_cost)

            validation_cost = self.cost.eval(
                feed_dict = feed_dict)
            print "Validation cost: "+str(validation_cost)

其中self.keep_prob：退出模型

它应该给出sam结果，但它不会：它输出：

验证费用：1.05199，

验证费用：0.115607

有谁能请我对发生的事情有所了解？

谢谢

ps：我已经删除了“优化器”行，但它输出了不同的结果

非常感谢

增加：

我的模型如下：

def convolutional_neural_network(self, x, img_sz, n_slices):
    weights = {
        'W_conv1': self.weight_variable([6, 8, 8, 1, 32]),
        'W_conv2': self.weight_variable([2, 5, 5, 32, 48]),
        'W_fc': self.weight_variable([int(
            math.ceil(n_slices / 4) * (math.ceil(img_sz / 4) * math.ceil(img_sz / 4) *
                                        80)),
                                      512]),
        'W_fc2': self.weight_variable([512, 256]),
        'out': self.weight_variable([256, 2])
    }

    biases = {
        'b_conv1': self.bias_variable([32]),
        'b_conv2': self.bias_variable([48]),
        'b_fc': self.bias_variable([512]),
        'b_fc2': self.bias_variable([256]),
        'out': self.bias_variable([2])
    }

    self.x_im = tf.reshape(x, shape=[-1, n_slices, img_sz, img_sz, 1])

    conv1 = tf.tanh(self.conv3d(self.x_im, weights['W_conv1']) + biases['b_conv1'])
    conv1 =self.maxpool3d(conv1)

    conv2 = tf.tanh(self.conv3d(conv1, weights['W_conv2']) + biases['b_conv2'])
    conv2 = self.maxpool3d(conv2)


    fc = tf.reshape(conv4, [-1,int(math.ceil(n_slices/8)*math.ceil(img_sz/8)*math.ceil(
        img_sz/8))*80])
    fc = tf.tanh(tf.matmul(fc, weights['W_fc'])+biases['b_fc'])
    fc = tf.tanh(tf.matmul(fc, weights['W_fc2'])+biases['b_fc2'])
    fc = tf.nn.dropout(fc, self.keep_prob)

    output = tf.matmul(fc, weights['out'])+biases['out']
    return output

其中x = tf.placeholder

另请注意，batch_img和batch_label是numpy数组。

并且：

def weight_variable(self, shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

def bias_variable(self, shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

培训过程：

def train_neural_network(self, data_img, labels,
                         batch_size, img_sz, n_slices, last_batch,
                         keep_rate, model_path):

    self.prediction = self.convolutional_neural_network(self.x, img_sz, n_slices)
    self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=self.y_,
                                                         logits=self.prediction))
    optimizer = tf.train.AdamOptimizer(self.learning_rate).minimize(self.cost)
    correct_prediction = tf.equal(tf.argmax(self.prediction, 1), tf.argmax(self.y_, 1))
    self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    hm_epochs = 1000
    saver = tf.train.Saver(tf.trainable_variables())
    n_epoch = 0
    learning_rate = 1e-4
    model_path_train = 'model_train_3/my_model.ckpt'

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        if model_path:
            saver.restore(sess, model_path_train)
        while n_epoch < hm_epochs:
            if len(data_img)>last_batch+batch_size:
                with tf.device('/cpu:0'):
                    #batch_img, batch_label, last_batch = self.get_image(
                    #    data_img, labels, last_batch, batch_size, img_sz, n_slices
                    #)
                    batch_img, batch_label, last_batch = self.wrapper_image(data_img, labels, last_batch, batch_size)

                print "Batch label images: "+str(batch_label)
                batch_label = self.dense_to_one_hot(np.array(batch_label, dtype=np.int),
                                                    2).astype(np.float32)


                ####### at the end of EACH EPOCH ###
                n_epoch += 1
                print "n_epoch: "+str(n_epoch)

                _, c = sess.run(
                    [optimizer, self.cost], feed_dict={
                        self.x: batch_img, self.y_: batch_label, self.keep_prob: keep_rate,
                        self.learning_rate: learning_rate
                    }
                )


                c = self.cost.eval(feed_dict = {self.x: batch_img, self.y_: batch_label,
                                             self.keep_prob:
                        keep_rate, self.learning_rate: learning_rate})

                print "train cost: "+str(c)

                c = self.cost.eval(feed_dict = {self.x: batch_img, self.y_: batch_label,
                                             self.keep_prob:
                        keep_rate, self.learning_rate: learning_rate})

                print "train cost: "+str(c)
                if model_path:
                    saver.save(sess, model_path_train)

Answer 1

通常，这是因为没有正确保存和恢复变量，或者如果使用批量规范化，那么就是数据。其结果是未恢复的变量在每次运行时使用随机权重进行初始化，每次都会给出不同的结果。

作为快速测试，在所有导入后立即插入tf.set_random_seed( 1 )，并查看是否修复了不同运行的值。如果是，那么我的理论更有可能是有效的。然后尝试打印每层激活的平均值和标准差，您将看到两个运行开始分歧的位置。

Answer 2

修正了，我刚刚意识到辍学保持概率。它被设置为0.5，然后模型在每次成本运行中激活模型的不同网络。如果我设置keep prob = 1.0，则结果在两次连续运行中输出相同。谢谢彼得的帮助

Tensorflow：运行变量两次不同的结果

2 个答案: