我正在尝试使用Python Tensorflow CNN进行CIFAR-100培训,但错误CUDA_ERROR_OUT_OF_MEMORY
,CUDA_STATUS_NOT_INITIALIZED
和CUDA_STATUS_BAD_PARAM
一直困扰着我,我正在使用Anaconda虚拟环境Tensorflow取决于我的机器,Python版本是Anaconda Python 3.5虚拟环境,Tensorflow版本是1.1.0,这是我的代码:
tf_cifar_learning.py:
# Set working directory
import os
dir_model = "c:/tf_model_cifar100"
# Modules needed
import numpy as np
import tensorflow as tf
import pandas as pd
from mlxtend.preprocessing import one_hot
# Load CIFAR Data
from batch import next_batch
from read import unpickle
import time
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.995)
# Prepare test data
testdata = unpickle('test')[b'data']
testdata1 = testdata.astype('float')
del testdata
testdata = testdata1[0:5000, :]
testlabel = unpickle('test')[b'coarse_labels'][0:5000]
testlabel = one_hot(testlabel, 100)
for i in range(testdata.shape[0]):
for j in range(3072):
testdata[i][j] = float(testdata[i][j]) / 255.0
if(i % 1000 == 0):
print("%d of 5000 test datasets processed" % i)
# Parameters
learning_rate = 0.001
training_iters = 1000000
batch_size = 10 # 128
display_step = 2
# Network Parameters
n_input = 1024*3 # CIFAR data input (img shape: 32*32)
n_classes = 100 # CIFAR total classes
dropout = 0.75 # Dropout, probability to keep units
# tf Graph input
x = tf.placeholder(tf.float32, [None, n_input])
y = tf.placeholder(tf.float32, [None, n_classes])
keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)
# Create some wrappers for simplicity
def conv2d(x, W, b, strides=1):
# Conv2D wrapper, with bias and relu activation
x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
x = tf.nn.bias_add(x, b)
o = tf.nn.relu(x)
return o
def maxpool2d(x, k=2):
# MaxPool2D wrapper
o = tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME')
return o
# Create model
def conv_net(x, weights, biases, dropout):
# Reshape input picture
x = tf.reshape(x, shape=[-1, 32, 32, 3])
# Convolution Layer
conv1 = conv2d(x, weights['wc1'], biases['bc1'])
# Max Pooling (down-sampling)
conv1 = maxpool2d(conv1, k=2)
# Convolution Layer
conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
# Max Pooling (down-sampling)
conv2 = maxpool2d(conv2, k=2)
# Fully connected layer
# Reshape conv2 output to fit fully connected layer input
fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]])
fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
fc1 = tf.nn.relu(fc1)
# Apply Dropout
fc1 = tf.nn.dropout(fc1, dropout)
# Output, class prediction
out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
return out
# Store layers weight & bias
weights = {
# 5x5 conv, 1 input, 32 outputs
'wc1': tf.Variable(tf.random_normal([5, 5, 3, 32])),
# 5x5 conv, 32 inputs, 64 outputs
'wc2': tf.Variable(tf.random_normal([5, 5, 32, 64])),
# fully connected, 8*8*64 inputs, 1024 outputs
'wd1': tf.Variable(tf.random_normal([8*8*64, 1024])),
# 1024 inputs, 100 outputs (class prediction)
'out': tf.Variable(tf.random_normal([1024, n_classes]))
}
biases = {
'bc1': tf.Variable(tf.random_normal([32])),
'bc2': tf.Variable(tf.random_normal([64])),
'bd1': tf.Variable(tf.random_normal([1024])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
# Construct model
pred = conv_net(x, weights, biases, keep_prob)
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Evaluate model
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# Initializing the variables
init = tf.global_variables_initializer()
# Launch the graph
with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
sess.run(init)
step = 1
# Time measuring
t1 = time.time()
# Keep training until reach max iterations
while step * batch_size < training_iters:
# Prepare training batch
batch_x, batch_y = next_batch(batch_size)
batch_x1 = np.zeros([batch_size, 3072], dtype="float32")
for i in range(batch_size):
for j in range(3072):
batch_x1[i][j] = batch_x[i][j] / 255.0
#if(i % 200 == 0):
#print("%d of %d training batch images processed" % (i, batch_size))
# Run optimization op (backprop)
sess.run(optimizer, feed_dict={x: batch_x1, y: batch_y, keep_prob: dropout})
if step % display_step == 0:
# Calculate batch loss and accuracy
loss, acc = sess.run([cost, accuracy], feed_dict={x: batch_x1, y: batch_y, keep_prob: 1.})
# Calculate accuracy for all test samples
acc = accuracy.eval({x: testdata, y: testlabel, keep_prob: 1.})
# Time measuring
t2 = time.time()
tmp = t2-t1
sec = tmp % 60
m = int(tmp / 60)
print("Iter# %8d"%(step*batch_size) + \
", Minibatch Loss= %16.10f"%(loss) + \
", Testing Accuracy= %8.6f"%(acc) + \
", Training currently elapsed " + \
"{:d} mins {:f} secs".format(m, sec))
step += 1
print("Optimization Finished!")
# Save the model after learning
model_saver = tf.train.Saver()
model_saver.save(sess, dir_model + "/CIFAR-100_cnn_model.chkp")
batch.py:
def next_batch(batch_size, onehot=True):
class a:
try:
temp = current_batch
except NameError:
current_batch = 0
import numpy as np
from read import unpickle
import tensorflow as tf
#from mlxtend.preprocessing import one_hot
dict_data = unpickle('train')
label = np.array(dict_data[b'fine_labels'][a.current_batch:a.current_batch+batch_size])
a1 = dict_data[b'data']
a2 = a1[a.current_batch:a.current_batch+batch_size, :]
a.current_batch += batch_size
a2 = np.reshape(a2, (batch_size, 3072))
with tf.device('/cpu:0'):
if(onehot==True):
label = tf.Session().run(tf.one_hot(label, 100))
return a2,label
read.py:
def unpickle(file):
import pickle
with open(file, 'rb') as a:
dict = pickle.load(a, encoding='bytes')
return dict
Windows CMD python tf_cifar_learning.py
输出:
(tensorflow) C:\Users\Administrator\learn_tensorflow\cifar-100-python>python tf_cifar_learning.py
0 of 5000 test datasets processed
1000 of 5000 test datasets processed
2000 of 5000 test datasets processed
3000 of 5000 test datasets processed
4000 of 5000 test datasets processed
2017-05-02 17:48:46.635855: W c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE instructions, but these are available on your machine and could speed up CPU computations.
2017-05-02 17:48:46.635975: W c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE2 instructions, but these are available on your machine and could speed up CPU computations.
2017-05-02 17:48:46.637256: W c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE3 instructions, but these are available on your machine and could speed up CPU computations.
2017-05-02 17:48:46.638434: W c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE4.1 instructions, but these are available on your machine and could speed up CPU computations.
2017-05-02 17:48:46.638939: W c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE4.2 instructions, but these are available on your machine and could speed up CPU computations.
2017-05-02 17:48:46.639456: W c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use AVX instructions, but these are available on your machine and could speed up CPU computations.
2017-05-02 17:48:46.641753: W c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use AVX2 instructions, but these are available on your machine and could speed up CPU computations.
2017-05-02 17:48:46.641909: W c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use FMA instructions, but these are available on your machine and could speed up CPU computations.
2017-05-02 17:48:46.994154: I c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\common_runtime\gpu\gpu_device.cc:887] Found device 0 with properties:
name: GeForce GT 730
major: 3 minor: 5 memoryClockRate (GHz) 0.9015
pciBusID 0000:01:00.0
Total memory: 2.00GiB
Free memory: 1.66GiB
2017-05-02 17:48:46.994318: I c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\common_runtime\gpu\gpu_device.cc:908] DMA: 0
2017-05-02 17:48:46.997080: I c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\common_runtime\gpu\gpu_device.cc:918] 0: Y
2017-05-02 17:48:46.997985: I c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\common_runtime\gpu\gpu_device.cc:977] Creating TensorFlow device (/gpu:0) -> (device: 0, name: GeForce GT 730, pci bus id: 0000:01:00.0)
2017-05-02 17:48:46.999359: E c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\stream_executor\cuda\cuda_driver.cc:893] failed to allocate 1.99G (2136745984 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY
2017-05-02 17:48:46.999434: E c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\stream_executor\cuda\cuda_driver.cc:893] failed to allocate 1.79G (1923071488 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY
2017-05-02 17:48:47.766766: I c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\common_runtime\gpu\gpu_device.cc:977] Creating TensorFlow device (/gpu:0) -> (device: 0, name: GeForce GT 730, pci bus id: 0000:01:00.0)
2017-05-02 17:48:48.334298: E c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\stream_executor\cuda\cuda_dnn.cc:359] could not create cudnn handle: CUDNN_STATUS_NOT_INITIALIZED
2017-05-02 17:48:48.334466: E c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\stream_executor\cuda\cuda_dnn.cc:366] error retrieving driver version: Unimplemented: kernel reported driver version not implemented on Windows
2017-05-02 17:48:48.343454: E c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\stream_executor\cuda\cuda_dnn.cc:326] could not destroy cudnn handle: CUDNN_STATUS_BAD_PARAM
2017-05-02 17:48:48.343558: F c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\kernels\conv_ops.cc:659] Check failed: stream->parent()->GetConvolveAlgorithms(&algorithms)
(tensorflow) C:\Users\Administrator\learn_tensorflow\cifar-100-python>
比Windows 10说Python已停止工作并立即将其杀死,有人可以告诉我问题是什么并告诉我(或者可能给我一个例子)如何修复它?
答案 0 :(得分:0)
问题很可能与您的环境有关。
您只有一个GPU,可能还用于显示。这就是TensorFlow无法在前面分配所需内存的原因。您可以控制per_process_gpu_memory_fraction使用多少GPU内存,如下所示:
https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/framework/test_util.py#L388
关于Cudnn,似乎Cudnn库无法自我初始化&#34; CUDNN_STATUS_NOT_INITIALIZED&#34;。您确定可以在该环境中运行其他Cuda和Cudnn样品吗?
答案 1 :(得分:0)
尝试将per_process_gpu_memory_fraction=0.995
设置为0.7或0.6
答案 2 :(得分:0)
现在我知道发生了什么。它实际上是一个OOM。重新启动并减少批量大小完成工作。
答案 3 :(得分:0)
当使用conda安装的TensorFlow-gpu == 1.13.1时,我遇到相同的错误。经过几天的努力,我用下面的代码解决了这个问题:
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
K.set_session(sess)
我认为问题是由将显示器插入图形卡引起的。