我刚刚安装了tensorflow gpu,我开始训练我的卷积神经网络。问题是我的gpu使用百分比一直在0%,有时它会增加到20%。 CPU占20%,磁盘占60%以上。我尝试测试我是否正确安装并进行了一些矩阵乘法,在这种情况下,一切都很好,GPU使用率超过90%。
with tf.device("/gpu:0"):
#here I set up the computational graph
当我运行图形时,我使用它,因此编译器将决定一个操作是否具有gpu实现
with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
我有一块NVIDIA GEFORCE GTX 950m显卡,我在运行时没有收到错误。我做错了什么?
稍后编辑,我的计算图
with tf.device("/gpu:0"):
X = tf.placeholder(tf.float32, shape=[None, height, width, channels], name="X")
dropout_rate= 0.3
training = tf.placeholder_with_default(False, shape=(), name="training")
X_drop = tf.layers.dropout(X, dropout_rate, training = training)
y = tf.placeholder(tf.int32, shape = [None], name="y")
conv1 = tf.layers.conv2d(X_drop, filters=32, kernel_size=3,
strides=1, padding="SAME",
activation=tf.nn.relu, name="conv1")
conv2 = tf.layers.conv2d(conv1, filters=64, kernel_size=3,
strides=2, padding="SAME",
activation=tf.nn.relu, name="conv2")
pool3 = tf.nn.max_pool(conv2,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding="VALID")
conv4 = tf.layers.conv2d(pool3, filters=128, kernel_size=4,
strides=3, padding="SAME",
activation=tf.nn.relu, name="conv4")
pool5 = tf.nn.max_pool(conv4,
ksize=[1, 2, 2, 1],
strides=[1, 1, 1, 1],
padding="VALID")
pool5_flat = tf.reshape(pool5, shape = [-1, 128*2*2])
fullyconn1 = tf.layers.dense(pool5_flat, 128, activation=tf.nn.relu, name = "fc1")
fullyconn2 = tf.layers.dense(fullyconn1, 64, activation=tf.nn.relu, name = "fc2")
logits = tf.layers.dense(fullyconn2, 2, name="output")
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)
loss = tf.reduce_mean(xentropy)
optimizer = tf.train.AdamOptimizer()
training_op = optimizer.minimize(loss)
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
init = tf.global_variables_initializer()
saver = tf.train.Saver()
hm_epochs = 100
config = tf.ConfigProto(allow_soft_placement=True)
config.gpu_options.allow_growth = True
批量大小为128
with tf.Session(config=config) as sess:
tbWriter = tf.summary.FileWriter(logPath, sess.graph)
dataset = tf.data.Dataset.from_tensor_slices((training_images, training_labels))
dataset = dataset.map(rd.decodeAndResize)
dataset = dataset.batch(batch_size)
testset = tf.data.Dataset.from_tensor_slices((test_images, test_labels))
testset = testset.map(rd.decodeAndResize)
testset = testset.batch(len(test_images))
iterator = dataset.make_initializable_iterator()
test_iterator = testset.make_initializable_iterator()
next_element = iterator.get_next()
sess.run(tf.global_variables_initializer())
for epoch in range(hm_epochs):
epoch_loss = 0
sess.run(iterator.initializer)
while True:
try:
epoch_x, epoch_y = sess.run(next_element)
# _, c = sess.run([optimizer, cost], feed_dict={x: epoch_x, y: epoch_y})
# epoch_loss += c
sess.run(training_op, feed_dict={X:epoch_x, y:epoch_y, training:True})
except tf.errors.OutOfRangeError:
break
sess.run(test_iterator.initializer)
# acc_train = accuracy.eval(feed_dict={X:epoch_x, y:epoch_y})
try:
next_test = test_iterator.get_next()
test_images, test_labels = sess.run(next_test)
acc_test = accuracy.eval(feed_dict={X:test_images, y:test_labels})
print("Epoch {0}: Train accuracy {1}".format(epoch, acc_test))
except tf.errors.OutOfRangeError:
break
# print("Epoch {0}: Train accuracy {1}, Test accuracy: {2}".format(epoch, acc_train, acc_test))
save_path = saver.save(sess, "./my_first_model")
我有9k培训图片和3k图片用于测试
答案 0 :(得分:1)
您的代码中存在一些可能导致GPU使用率较低的问题。
1)在Dataset
管道的末尾添加prefetch
指令,以使CPU能够维护输入数据批处理缓冲区,以便将它们移动到GPU。
# this should be the last thing in your pipeline
dataset = dataset.prefetch(1)
2)您正在使用feed_dict
来提供模型以及Dataset
迭代器。这不是预期的方式! feed_dict
is the slowest method of inputting data to your model and not recommended。您应该根据迭代器的next_element
输出来定义模型。
示例:
next_x, next_y = iterator.get_next()
with tf.device('/GPU:0'):
conv1 = tf.layers.conv2d(next_x, filters=32, kernel_size=3,
strides=1, padding="SAME",
activation=tf.nn.relu, name="conv1")
# rest of model here...
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
labels=next_y)
然后你可以使用feed_dict
在没有的情况下调用你的训练操作,并且迭代器将在幕后处理向你的模型提供数据。 Here is another related Q&A。新的训练循环看起来像这样:
while True:
try:
sess.run(training_op, feed_dict={training:True})
except tf.errors.OutOfRangeError:
break
你应该只通过你的迭代器没有提供的feed_dict
输入数据,而这些数据通常应该非常轻量级。
有关效果的更多提示,请参阅this guide on TF website。
答案 1 :(得分:0)
您可以尝试以下代码来查看tensorflow是否识别您的GPU:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())