我试图为Tensorflow设置GPU支持,但我必须承认,我本可以/应该做得更好。但是在这里,我不确定我的问题是由于我的GPU旧还是设置不正确。
这是我所做的:
我有NVIDIA Quadro K2000D(https://www.techpowerup.com/gpu-specs/quadro-k2000d.c2021) 首先,我卸载了所有NVIDIA驱动程序/程序/所有内容,并删除了NVIDIA程序文件。 然后为我的显卡安装了431.02驱动程序。 之后,我安装了CUDA 10.0,但它需要VS 2017, 因此,在此之后,我安装了CUDA 10.1,它要求使用VS 2019,而不会与以前的安装发生冲突。 将所有cuDNN文件拖到相应的目录中。 已定义所需的%PATH%-s 在一个环境中安装了tensorflow-2.0.0 beta,在另一个环境中安装了1.13.1。
明智的设置就可以了。
我所做的全部工作都是运行神经网络,该网络完全基于Raymond Yuan(https://ej.uz/hk9s)的“使用tf.keras进行图像分割”,但输入管道有所变化,但模型本身完全相同。 我只能使用尺寸为64 x 64和批处理大小为5的图像成功运行它,即使这样-并非总是如此。
我正在使用的代码:
#%%
import os
import numpy as np
import tensorflow as tf
from tensorflow.python.keras import layers
from tensorflow.python.keras import losses
from tensorflow.python.keras import models
#%%
x_train_filenames = []
y_train_filenames = []
x_eval_filenames = []
y_eval_filenames = []
for x in range(500):
x_train_filenames.append(os.path.join('C:', os.sep, 'Users', 'Peteris.Zvejnieks', 'Data', 'Train', 'Images', 'gen_{}_.png'.format(x)))
y_train_filenames.append(os.path.join('C:', os.sep, 'Users', 'Peteris.Zvejnieks', 'Data', 'Train', 'Labels', 'gen_{}_seg_.png'.format(x)))
x_eval_filenames.append(os.path.join('C:', os.sep, 'Users', 'Peteris.Zvejnieks', 'Data', 'Evaluate', 'Images', 'gen_{}_.png'.format(x)))
y_eval_filenames.append(os.path.join('C:', os.sep, 'Users', 'Peteris.Zvejnieks', 'Data', 'Evaluate', 'Labels', 'gen_{}_seg_.png'.format(x)))
num_train_examples = len(x_train_filenames)
num_eval_examples = len(x_eval_filenames)
#%%
size = 64
img_shape = (size, size, 1)
batch_size = 50
epochs = 10
threads = 5
def _process_pathnames(img_name, lbl_name):
img_str = tf.io.read_file(img_name)
img = tf.image.decode_png(img_str)
lbl_str = tf.io.read_file(lbl_name)
lbl = tf.image.decode_png(lbl_str)
return img, lbl
training_dataset = tf.data.Dataset.from_tensor_slices((x_train_filenames, y_train_filenames))
training_dataset = training_dataset.map(_process_pathnames, num_parallel_calls=threads)
training_dataset = training_dataset.shuffle(num_train_examples)
training_dataset = training_dataset.repeat().batch(batch_size)
evaluation_dataset = tf.data.Dataset.from_tensor_slices(((x_eval_filenames, y_eval_filenames)))
evaluation_dataset = evaluation_dataset.map(_process_pathnames, num_parallel_calls=threads)
evaluation_dataset = evaluation_dataset.shuffle(num_eval_examples)
evaluation_dataset = evaluation_dataset.repeat().batch(batch_size)
#%%
def conv_block(input_tensor, num_filters):
encoder = layers.Conv2D(num_filters, (3, 3), padding='same')(input_tensor)
encoder = layers.BatchNormalization()(encoder)
encoder = layers.Activation('relu')(encoder)
encoder = layers.Conv2D(num_filters, (3, 3), padding='same')(encoder)
encoder = layers.BatchNormalization()(encoder)
encoder = layers.Activation('relu')(encoder)
return encoder
def encoder_block(input_tensor, num_filters):
encoder = conv_block(input_tensor, num_filters)
encoder_pool = layers.MaxPooling2D((2, 2), strides=(2, 2))(encoder)
return encoder_pool, encoder
def decoder_block(input_tensor, concat_tensor, num_filters):
decoder = layers.Conv2DTranspose(num_filters, (2, 2), strides=(2, 2), padding='same')(input_tensor)
decoder = layers.concatenate([concat_tensor, decoder], axis=-1)
decoder = layers.BatchNormalization()(decoder)
decoder = layers.Activation('relu')(decoder)
decoder = layers.Conv2D(num_filters, (3, 3), padding='same')(decoder)
decoder = layers.BatchNormalization()(decoder)
decoder = layers.Activation('relu')(decoder)
decoder = layers.Conv2D(num_filters, (3, 3), padding='same')(decoder)
decoder = layers.BatchNormalization()(decoder)
decoder = layers.Activation('relu')(decoder)
return decoder
inputs = layers.Input(shape=img_shape) # 256
encoder0_pool, encoder0 = encoder_block(inputs, 32) # 128
encoder1_pool, encoder1 = encoder_block(encoder0_pool, 64) # 64
encoder2_pool, encoder2 = encoder_block(encoder1_pool, 128) # 32
encoder3_pool, encoder3 = encoder_block(encoder2_pool, 256) # 16
encoder4_pool, encoder4 = encoder_block(encoder3_pool, 512) # 8
center = conv_block(encoder4_pool, 1024) # center
decoder4 = decoder_block(center, encoder4, 512) # 16
decoder3 = decoder_block(decoder4, encoder3, 256) # 32
decoder2 = decoder_block(decoder3, encoder2, 128) # 64
decoder1 = decoder_block(decoder2, encoder1, 64) # 128
decoder0 = decoder_block(decoder1, encoder0, 32) # 256
outputs = layers.Conv2D(1, (1, 1), activation='sigmoid')(decoder0)
model = models.Model(inputs=[inputs], outputs=[outputs])
#%%
def dice_coeff(y_true, y_pred):
smooth = 1.
# Flatten
y_true_f = tf.reshape(y_true, [-1])
y_pred_f = tf.reshape(y_pred, [-1])
intersection = tf.reduce_sum(y_true_f * y_pred_f)
score = (2. * intersection + smooth) / (tf.reduce_sum(y_true_f) + tf.reduce_sum(y_pred_f) + smooth)
return score
def dice_loss(y_true, y_pred):
loss = 1 - dice_coeff(y_true, y_pred)
return loss
def bce_dice_loss(y_true, y_pred):
loss = losses.binary_crossentropy(y_true, y_pred) + dice_loss(y_true, y_pred)
return loss
model.compile(optimizer='adam', loss=bce_dice_loss, metrics=[dice_loss])
save_model_path = os.path.join('C:', os.sep, 'Users', 'Peteris.Zvejnieks', 'Data', 'tmp', 'weights.hdf5')
cp = tf.keras.callbacks.ModelCheckpoint(filepath=save_model_path, monitor='val_dice_loss', save_best_only=True, verbose=1)
#%%
history = model.fit(training_dataset,
steps_per_epoch=int(np.ceil(num_train_examples / float(batch_size))),
epochs=epochs,
validation_data=evaluation_dataset,
validation_steps=int(np.ceil(num_eval_examples / float(batch_size))),
callbacks=[cp])
大多数时候,我会收到此错误:
Traceback (most recent call last):
File "<ipython-input-37-80ce63ea7414>", line 1, in <module>
runfile('C:/Users/Peteris.Zvejnieks/Data/U-Net_cOpY.py', wdir='C:/Users/Peteris.Zvejnieks/Data')
File "C:\ProgramData\Anaconda3\envs\tf_build_env\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 827, in runfile
execfile(filename, namespace)
File "C:\ProgramData\Anaconda3\envs\tf_build_env\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 110, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "C:/Users/Peteris.Zvejnieks/Data/U-Net_cOpY.py", line 162, in <module>
callbacks=[cp])
File "C:\ProgramData\Anaconda3\envs\tf_build_env\lib\site-packages\tensorflow\python\keras\engine\training.py", line 880, in fit
validation_steps=validation_steps)
File "C:\ProgramData\Anaconda3\envs\tf_build_env\lib\site-packages\tensorflow\python\keras\engine\training_arrays.py", line 266, in model_iteration
batch_outs = f(actual_inputs)
File "C:\ProgramData\Anaconda3\envs\tf_build_env\lib\site-packages\tensorflow\python\keras\backend.py", line 3076, in __call__
run_metadata=self.run_metadata)
File "C:\ProgramData\Anaconda3\envs\tf_build_env\lib\site-packages\tensorflow\python\client\session.py", line 1439, in __call__
run_metadata_ptr)
File "C:\ProgramData\Anaconda3\envs\tf_build_env\lib\site-packages\tensorflow\python\framework\errors_impl.py", line 528, in __exit__
c_api.TF_GetCode(self.status.status))
ResourceExhaustedError: OOM when allocating tensor with shape[5,32,64,64] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
[[{{node training/Adam/gradients/zeros_150}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.
[[{{node metrics/dice_loss/div_no_nan}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.
最近的错误消息:
An error ocurred while starting the kernel
2019 10:40:25.118724: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX AVX2
2019 10:40:25.248723: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 0 with properties:
name: Quadro K2000D major: 3 minor: 0 memoryClockRate(GHz): 0.954
pciBusID: 0000:01:00.0
totalMemory: 2.00GiB freeMemory: 1.63GiB
2019 10:40:25.248723: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1512] Adding visible gpu devices: 0
2019 10:40:25.638719: I tensorflow/core/common_runtime/gpu/gpu_device.cc:984] Device interconnect StreamExecutor with strength 1 edge matrix:
2019 10:40:25.638719: I tensorflow/core/common_runtime/gpu/gpu_device.cc:990] 0
2019 10:40:25.638719: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 0: N
2019 10:40:25.638719: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 1396 MB memory) ‑> physical GPU (device: 0, name: Quadro K2000D, pci bus id: 0000:01:00.0, compute capability: 3.0)
2019 10:40:40.136574: W tensorflow/core/common_runtime/bfc_allocator.cc:211] Allocator (GPU_0_bfc) ran out of memory trying to allocate 703.40MiB. The caller indicates that this is not a failure, but may mean that there could be performance gains if more memory were available.
2019 10:40:40.196574: W tensorflow/core/common_runtime/bfc_allocator.cc:211] Allocator (GPU_0_bfc) ran out of memory trying to allocate 641.25MiB. The caller indicates that this is not a failure, but may mean that there could be performance gains if more memory were available.
2019 10:40:40.576570: W tensorflow/core/common_runtime/bfc_allocator.cc:211] Allocator (GPU_0_bfc) ran out of memory trying to allocate 1.27GiB. The caller indicates that this is not a failure, but may mean that there could be performance gains if more memory were available.
2019 10:40:40.636569: W tensorflow/core/common_runtime/bfc_allocator.cc:211] Allocator (GPU_0_bfc) ran out of memory trying to allocate 1.20GiB. The caller indicates that this is not a failure, but may mean that there could be performance gains if more memory were available.
2019 10:40:42.116554: W tensorflow/core/common_runtime/bfc_allocator.cc:211] Allocator (GPU_0_bfc) ran out of memory trying to allocate 2.44GiB. The caller indicates that this is not a failure, but may mean that there could be performance gains if more memory were available.
2019 10:40:42.246553: W tensorflow/core/common_runtime/bfc_allocator.cc:211] Allocator (GPU_0_bfc) ran out of memory trying to allocate 2.39GiB. The caller indicates that this is not a failure, but may mean that there could be performance gains if more memory were available.
2019 10:40:46.028515: E tensorflow/stream_executor/cuda/cuda_driver.cc:981] failed to synchronize the stop event: CUDA_ERROR_LAUNCH_TIMEOUT: the launch timed out and was terminated
2019 10:40:46.028515: E tensorflow/stream_executor/cuda/cuda_timer.cc:55] Internal: error destroying CUDA event in context 00000000209996B0: CUDA_ERROR_LAUNCH_TIMEOUT: the launch timed out and was terminated
2019 10:40:46.028515: E tensorflow/stream_executor/cuda/cuda_timer.cc:60] Internal: error destroying CUDA event in context 00000000209996B0: CUDA_ERROR_LAUNCH_TIMEOUT: the launch timed out and was terminated
2019 10:40:46.028515: F tensorflow/stream_executor/cuda/cuda_dnn.cc:194] Check failed: status == CUDNN_STATUS_SUCCESS (7 vs. 0)Failed to set cuDNN stream.