我是tensorflow的新人,今天当我运行我的训练代码时,我得到错误并且无法运行。这是详细信息:
当我训练我的数据时,它会说# coding=utf-8
from color_1 import read_and_decode, get_batch
import LeNet_5
import os
import tensorflow as tf
batch_size =16
TRAIN_STEPS = 10000
crop_size = 224
REGULARAZTION_RATE=0.0001
def train(batch_x, batch_y):
image_holder = tf.placeholder(tf.float32, [batch_size, 224, 224, 3], name='x-input')
label_holder = tf.placeholder(tf.float32, [batch_size], name='y-input')
regularizer = tf.contrib.layers.l2_regularizer(REGULARAZTION_RATE)
y = LeNet_5.inference(image_holder, train,regularizer)
global_step = tf.Variable(0, trainable=False)
def loss(logits, labels):
labels = tf.cast(labels, tf.int64)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits=logits, labels=labels, name='cross_entropy_per_example')
cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
tf.add_to_collection('losses', cross_entropy_mean)
return tf.add_n(tf.get_collection('losses'), name='total_loss')
loss = loss(y, label_holder)
train_op = tf.train.AdamOptimizer(1e-3).minimize(loss)
tf.add_to_collection('train_op', train_op)
saver = tf.train.Saver(max_to_keep=3)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
for i in range(TRAIN_STEPS):
image_batch, label_batch = sess.run([batch_x, batch_y])
_, loss_value, step = sess.run([train_op, loss, global_step], feed_dict={image_holder: image_batch,
label_holder: label_batch})
if i % 100 == 0:
format_str = ('After %d step,loss on training batch is: %g')
print (format_str % (i, loss_value))
coord.request_stop()
coord.join(threads)
def main(argv=None):
image, label = read_and_decode('train_day_night.tfrecords')
batch_image, batch_label = get_batch(image, label, batch_size, crop_size)
train(batch_image, batch_label)
if __name__ == '__main__':
tf.app.run()
并执行以下几个步骤:
我的代码在这里:
# -*- coding:utf-8 -*-
import tensorflow as tf
def inference(input_tensor,train,regularizer):
with tf.variable_scope('layer1-conver1'):
conv1_weights=tf.get_variable("weight",[5,5,3,32],initializer=tf.truncated_normal_initializer(stddev=0.1))
conv1_biases=tf.get_variable("biase",[32],initializer=tf.truncated_normal_initializer(0.0))
conv1=tf.nn.conv2d(input_tensor,conv1_weights,strides=[1,1,1,1],padding='SAME')
relu1=tf.nn.relu(tf.nn.bias_add(conv1,conv1_biases))
with tf.variable_scope('layer2-pool1'):
pool1=tf.nn.max_pool(relu1,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
with tf.variable_scope('layer3-conv2'):
conv2_weights=tf.get_variable("weight",[5,5,32,64],initializer=tf.truncated_normal_initializer(stddev=0.1))
conv2_biases=tf.get_variable("biase",[64],initializer=tf.truncated_normal_initializer(0.0))
conv2=tf.nn.conv2d(pool1,conv2_weights,strides=[1,1,1,1],padding='SAME')
relu2=tf.nn.relu(tf.nn.bias_add(conv2,conv2_biases))
with tf.variable_scope('layer4-pool2'):
pool2=tf.nn.max_pool(relu2,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
pool_shape=pool2.get_shape().as_list()
nodes=pool_shape[1]*pool_shape[2]*pool_shape[3]
reshaped=tf.reshape(pool2,[pool_shape[0],nodes])
with tf.variable_scope('layer5-fc1'):
fc1_weights=tf.get_variable("weight",[nodes,512],initializer=tf.truncated_normal_initializer(stddev=0.1))
if regularizer !=None:
tf.add_to_collection('losses',regularizer(fc1_weights))
fc1_biases=tf.get_variable("biases",[512],initializer=tf.truncated_normal_initializer(0.1))
fc1=tf.nn.relu(tf.matmul(reshaped,fc1_weights)+fc1_biases)
if train:fc1=tf.nn.dropout(fc1,0.5)
with tf.variable_scope('layer6-fc2'):
fc2_weights = tf.get_variable("weight", [512, 2], initializer=tf.truncated_normal_initializer(stddev=0.1))
if regularizer != None:
tf.add_to_collection('losses', regularizer(fc2_weights))
fc2_biases = tf.get_variable("biases", [2], initializer=tf.truncated_normal_initializer(0.1))
logit=tf.matmul(fc1,fc2_weights)+fc2_biases
return logit
我使用的LeNet_5就在这里:
SOAPAction
.tfrecords文件中的图像尺寸为224 * 224 * 3。现在我真的不知道为什么以及如何解决。你能帮助我吗?非常感谢你 !如果您需要任何信息,请告诉我。
答案 0 :(得分:1)
这意味着您的GPU memory
已用完。您可能希望将输入管道(get_batch() and read_and_decode()
)移动到CPU。您可以使用with tf.device('/cpu:0'):
来实现这一目标。