Question

我有一个fruit_train_net.py文件，其中包含以下代码

import tensorflow as tf
import numpy as np
import time
import os
import re

tf.reset_default_graph()

from Pythonfiles import fruit_network as network
from Pythonfiles import utils
from Pythonfiles import constants

# default number of iterations to run the training
iterations = 500
# default amount of iterations after we display the loss and accuracy
display_interval = 50
# use the saved model and continue training
useCkpt = False
# placeholder for probability to keep the network parameters after an iteration
keep_prob = tf.placeholder(tf.float32, name='keep_prob')


# prepare the input tensors for the network
def inputs(filenames, batch_size):
    image, label = utils.read_file(filenames)
    image = utils.adjust_image_for_train(image)
    images, labels = tf.train.shuffle_batch([image, label],
                                            batch_size=batch_size,
                                            capacity=35000 + batch_size,
                                            min_after_dequeue=5000,
                                            allow_smaller_final_batch=True)
    return images, labels

#error line
logits = network.conv_net(network.X, network.weights, network.biases, keep_prob)
prediction = tf.nn.softmax(logits)

loss_operation = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                                               labels=network.Y))

optimizer = tf.train.AdamOptimizer(learning_rate=network.learning_rate)
train_op = optimizer.minimize(loss=loss_operation)

correct_prediction = tf.equal(tf.argmax(prediction, 1), network.Y)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

init = tf.global_variables_initializer()


def train_model():
    global learning_rate
    time1 = time.time()
    for i in range(1, iterations + 1):
        with tf.Graph().as_default():

            batch_x, batch_y = sess.run([images, labels])
            batch_x = np.reshape(batch_x, [network.batch_size, network.input_size])

            sess.run(train_op, feed_dict={network.X: batch_x, network.Y: batch_y, keep_prob: network.dropout})

            if i % display_interval == 0 or i == 1:
                loss, acc = sess.run([loss_operation, accuracy], feed_dict={network.X: batch_x, network.Y: batch_y, keep_prob: 1})
                learning_rate = update_learning_rate(acc, learn_rate=network.initial_learning_rate)
                # save the weights and the meta data for the graph
                saver.save(sess, constants.fruit_models_dir + 'model.ckpt')
                tf.train.write_graph(sess.graph_def, constants.fruit_models_dir, 'graph.pbtxt')
                time2 = time.time()
                print("time: %.4f step: %d loss: %.4f accuracy: %.4f" % (time2 - time1, i, loss, acc))
                time1 = time.time()


def update_learning_rate(acc, learn_rate):
    return learn_rate - acc * learn_rate * 0.9


saver = tf.train.Saver()

with tf.Session() as sess:
    sess.run(init)
    # input tfrecord files
    tfrecords_files = [(constants.data_dir + f) for f in os.listdir(constants.data_dir) if re.match('train', f)]
    images, labels = inputs(tfrecords_files, network.batch_size)
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)

    # restore the previously saved value if we wish to continue the training
    if useCkpt:
        ckpt = tf.train.get_checkpoint_state(constants.fruit_models_dir)
        saver.restore(sess, ckpt.model_checkpoint_path)

    train_model()

    coord.request_stop()
    coord.join(threads)
    sess.close()

运行此代码会给我一个错误，

ValueError：Tensor（“ dropout_23 / random_uniform：0”，shape =（?, 1024），dtype = float32）必须与Tensor（“ keep_prob：0”，dtype = float32）来自同一张图。

我在spyder中使用了调试器，并逐行运行了代码。

这行给我一个错误，

#error line
logits = network.conv_net(network.X, network.weights, network.biases, keep_prob)

这正在调用来自此文件的conv_net函数，

import tensorflow as tf
import math
from . import utils
from Pythonfiles import constants



batch_size = 50
input_size = utils.HEIGHT * utils.WIDTH * utils.NETWORK_DEPTH
# number of max pool operations used in the network structure;
# used when calculating the input size for the first fully connected layer
# MUST BE UPDATED if the number of max pool operations changes or if the type of max pool changes
number_of_max_pools = 4
new_width = math.ceil(utils.WIDTH/(1 << number_of_max_pools))
new_height = math.ceil(utils.HEIGHT/(1 << number_of_max_pools))
# probability to keep the values after a training iteration
dropout = 0.8

# placeholder for input layer
X = tf.placeholder(tf.float32, [None, input_size], name="X")
# placeholder for actual labels
Y = tf.placeholder(tf.int64, [batch_size], name="Y")

# number of activation maps for each convolutional layer
number_of_act_maps_conv1 = 16
number_of_act_maps_conv2 = 32
number_of_act_maps_conv3 = 64
number_of_act_maps_conv4 = 128

# number of outputs for each fully connected layer
number_of_fcl_outputs1 = 1024
number_of_fcl_outputs2 = 256

initial_learning_rate = 0.001
final_learning_rate = 0.00001
learning_rate = initial_learning_rate


def conv_net(X, weights, biases, dropout):
    X = tf.reshape(X, shape=[-1, utils.HEIGHT, utils.WIDTH, utils.NETWORK_DEPTH])

    conv1 = utils.conv2d('conv1', X, weights['conv_weight1'], biases['conv_bias1'])
    conv1 = utils.maxpool2d('max_pool1', conv1, k=2)

    conv2 = utils.conv2d('conv2', conv1, weights['conv_weight2'], biases['conv_bias2'])
    conv2 = utils.maxpool2d('max_pool2', conv2, k=2)

    conv3 = utils.conv2d('conv3', conv2, weights['conv_weight3'], biases['conv_bias3'])
    conv3 = utils.maxpool2d('max_pool3', conv3, k=2)

    conv4 = utils.conv2d('conv4', conv3, weights['conv_weight4'], biases['conv_bias4'])
    conv4 = utils.maxpool2d('max_pool4', conv4, k=2)

    fc1 = tf.reshape(conv4, shape=[-1, weights['fcl_weight1'].get_shape().as_list()[0]])
    fc1 = tf.nn.relu(tf.add(tf.matmul(fc1, weights['fcl_weight1']), biases['fcl_bias1']))
    fc1 = tf.nn.dropout(fc1, dropout)

    fc2 = tf.nn.relu(tf.add(tf.matmul(fc1, weights['fcl_weight2']), biases['fcl_bias2']))
    fc2 = tf.nn.dropout(fc2, dropout)

    out = tf.add(tf.matmul(fc2, weights['out_weight']), biases['out_bias'], name='softmax')
    return out


weights = {
    'conv_weight1': utils.variable_with_weight_decay('conv_weight1', [5, 5, utils.NETWORK_DEPTH, number_of_act_maps_conv1],
                                                     tf.truncated_normal_initializer(stddev=5e-2, dtype=tf.float32)),
    'conv_weight2': utils.variable_with_weight_decay('conv_weight2', [5, 5, number_of_act_maps_conv1, number_of_act_maps_conv2],
                                                     tf.truncated_normal_initializer(stddev=5e-2, dtype=tf.float32)),
    'conv_weight3': utils.variable_with_weight_decay('conv_weight3', [5, 5, number_of_act_maps_conv2, number_of_act_maps_conv3],
                                                     tf.truncated_normal_initializer(stddev=5e-2, dtype=tf.float32)),
    'conv_weight4': utils.variable_with_weight_decay('conv_weight4', [5, 5, number_of_act_maps_conv3, number_of_act_maps_conv4],
                                                     tf.truncated_normal_initializer(stddev=5e-2, dtype=tf.float32)),
    'fcl_weight1': utils.variable_with_weight_decay('fcl_weight1', [new_width * new_height * number_of_act_maps_conv4, number_of_fcl_outputs1],
                                                    tf.truncated_normal_initializer(stddev=5e-2, dtype=tf.float32)),
    'fcl_weight2': utils.variable_with_weight_decay('fcl_weight2', [number_of_fcl_outputs1, number_of_fcl_outputs2],
                                                    tf.truncated_normal_initializer(stddev=5e-2, dtype=tf.float32)),
    'out_weight': utils.variable_with_weight_decay('out_weight', [number_of_fcl_outputs2, constants.num_classes],
                                                   tf.truncated_normal_initializer(stddev=5e-2, dtype=tf.float32)),
}
biases = {
    'conv_bias1': tf.Variable(tf.zeros([number_of_act_maps_conv1])),
    'conv_bias2': tf.Variable(tf.zeros([number_of_act_maps_conv2])),
    'conv_bias3': tf.Variable(tf.zeros([number_of_act_maps_conv3])),
    'conv_bias4': tf.Variable(tf.zeros([number_of_act_maps_conv4])),
    'fcl_bias1': tf.Variable(tf.zeros([number_of_fcl_outputs1])),
    'fcl_bias2': tf.Variable(tf.zeros([number_of_fcl_outputs2])),
    'out_bias': tf.Variable(tf.zeros([constants.num_classes]))
}

我不理解该错误，因为该错误较早起作用，而现在不起作用。

我使用了相关问题的解决方案，即使用 tf.reset_default_graph（），但这无济于事。

更新：- 重新启动Sypder并重新启动内核-它在训练过程中起作用，现在在模型测试期间出现相同的错误。

Answer 1

如错误消息所示，keep_prob属于另一个图。

尝试在定义keep_prob之前定义图形，如下所示：

graph = tf.Graph()
with graph.as_default():
    keep_prob = ...

然后，每当您定义应属于该图的操作时，显式重用此图。

Python Tensor Flow Training-值错误：Tensor必须与Tensor来自同一图

1 个答案: