我正在尝试在tensorflow中创建一个CNN模型,我无法理解一个问题:
我的模型是一个名为convolutional_neural_network的函数中的标准模型3d模型。
配置为:
self.prediction = self.convolutional_neural_network(self.x, img_sz, n_slices)
self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=self.y_,
logits=self.prediction))
optimizer = tf.train.AdamOptimizer(self.learning_rate).minimize(self.cost)
correct_prediction = tf.equal(tf.argmax(self.prediction, 1), tf.argmax(self.y_, 1))
self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
其中self.y_是放置标签的占位符。
在某些时候,我正在尝试执行这段代码:
feed_dict = {self.x: batch_img, self.y_: batch_label, self.keep_prob:
keep_rate, self.learning_rate: learning_rate}
validation_cost = self.cost.eval(
feed_dict = feed_dict )
print "Validation cost: "+str(validation_cost)
validation_cost = self.cost.eval(
feed_dict = feed_dict)
print "Validation cost: "+str(validation_cost)
其中self.keep_prob:退出模型
它应该给出sam结果,但它不会:它输出:
验证费用:1.05199,
验证费用:0.115607
有谁能请我对发生的事情有所了解?
谢谢
ps:我已经删除了“优化器”行,但它输出了不同的结果
非常感谢
增加:
我的模型如下:
def convolutional_neural_network(self, x, img_sz, n_slices):
weights = {
'W_conv1': self.weight_variable([6, 8, 8, 1, 32]),
'W_conv2': self.weight_variable([2, 5, 5, 32, 48]),
'W_fc': self.weight_variable([int(
math.ceil(n_slices / 4) * (math.ceil(img_sz / 4) * math.ceil(img_sz / 4) *
80)),
512]),
'W_fc2': self.weight_variable([512, 256]),
'out': self.weight_variable([256, 2])
}
biases = {
'b_conv1': self.bias_variable([32]),
'b_conv2': self.bias_variable([48]),
'b_fc': self.bias_variable([512]),
'b_fc2': self.bias_variable([256]),
'out': self.bias_variable([2])
}
self.x_im = tf.reshape(x, shape=[-1, n_slices, img_sz, img_sz, 1])
conv1 = tf.tanh(self.conv3d(self.x_im, weights['W_conv1']) + biases['b_conv1'])
conv1 =self.maxpool3d(conv1)
conv2 = tf.tanh(self.conv3d(conv1, weights['W_conv2']) + biases['b_conv2'])
conv2 = self.maxpool3d(conv2)
fc = tf.reshape(conv4, [-1,int(math.ceil(n_slices/8)*math.ceil(img_sz/8)*math.ceil(
img_sz/8))*80])
fc = tf.tanh(tf.matmul(fc, weights['W_fc'])+biases['b_fc'])
fc = tf.tanh(tf.matmul(fc, weights['W_fc2'])+biases['b_fc2'])
fc = tf.nn.dropout(fc, self.keep_prob)
output = tf.matmul(fc, weights['out'])+biases['out']
return output
其中x = tf.placeholder
另请注意,batch_img和batch_label是numpy数组。
并且:
def weight_variable(self, shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(self, shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
培训过程:
def train_neural_network(self, data_img, labels,
batch_size, img_sz, n_slices, last_batch,
keep_rate, model_path):
self.prediction = self.convolutional_neural_network(self.x, img_sz, n_slices)
self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=self.y_,
logits=self.prediction))
optimizer = tf.train.AdamOptimizer(self.learning_rate).minimize(self.cost)
correct_prediction = tf.equal(tf.argmax(self.prediction, 1), tf.argmax(self.y_, 1))
self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
hm_epochs = 1000
saver = tf.train.Saver(tf.trainable_variables())
n_epoch = 0
learning_rate = 1e-4
model_path_train = 'model_train_3/my_model.ckpt'
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
if model_path:
saver.restore(sess, model_path_train)
while n_epoch < hm_epochs:
if len(data_img)>last_batch+batch_size:
with tf.device('/cpu:0'):
#batch_img, batch_label, last_batch = self.get_image(
# data_img, labels, last_batch, batch_size, img_sz, n_slices
#)
batch_img, batch_label, last_batch = self.wrapper_image(data_img, labels, last_batch, batch_size)
print "Batch label images: "+str(batch_label)
batch_label = self.dense_to_one_hot(np.array(batch_label, dtype=np.int),
2).astype(np.float32)
####### at the end of EACH EPOCH ###
n_epoch += 1
print "n_epoch: "+str(n_epoch)
_, c = sess.run(
[optimizer, self.cost], feed_dict={
self.x: batch_img, self.y_: batch_label, self.keep_prob: keep_rate,
self.learning_rate: learning_rate
}
)
c = self.cost.eval(feed_dict = {self.x: batch_img, self.y_: batch_label,
self.keep_prob:
keep_rate, self.learning_rate: learning_rate})
print "train cost: "+str(c)
c = self.cost.eval(feed_dict = {self.x: batch_img, self.y_: batch_label,
self.keep_prob:
keep_rate, self.learning_rate: learning_rate})
print "train cost: "+str(c)
if model_path:
saver.save(sess, model_path_train)
答案 0 :(得分:0)
通常,这是因为没有正确保存和恢复变量,或者如果使用批量规范化,那么就是数据。其结果是未恢复的变量在每次运行时使用随机权重进行初始化,每次都会给出不同的结果。
作为快速测试,在所有导入后立即插入tf.set_random_seed( 1 )
,并查看是否修复了不同运行的值。如果是,那么我的理论更有可能是有效的。然后尝试打印每层激活的平均值和标准差,您将看到两个运行开始分歧的位置。
答案 1 :(得分:0)
修正了,我刚刚意识到辍学保持概率。它被设置为0.5,然后模型在每次成本运行中激活模型的不同网络。如果我设置keep prob = 1.0,则结果在两次连续运行中输出相同。谢谢彼得的帮助