Question

我试图为Tensorflow设置GPU支持，但我必须承认，我本可以/应该做得更好。但是在这里，我不确定我的问题是由于我的GPU旧还是设置不正确。

这是我所做的：

我有NVIDIA Quadro K2000D（https://www.techpowerup.com/gpu-specs/quadro-k2000d.c2021）首先，我卸载了所有NVIDIA驱动程序/程序/所有内容，并删除了NVIDIA程序文件。然后为我的显卡安装了431.02驱动程序。之后，我安装了CUDA 10.0，但它需要VS 2017，因此，在此之后，我安装了CUDA 10.1，它要求使用VS 2019，而不会与以前的安装发生冲突。将所有cuDNN文件拖到相应的目录中。已定义所需的％PATH％-s 在一个环境中安装了tensorflow-2.0.0 beta，在另一个环境中安装了1.13.1。

明智的设置就可以了。

我所做的全部工作都是运行神经网络，该网络完全基于Raymond Yuan（https://ej.uz/hk9s）的“使用tf.keras进行图像分割”，但输入管道有所变化，但模型本身完全相同。我只能使用尺寸为64 x 64和批处理大小为5的图像成功运行它，即使这样-并非总是如此。

我正在使用的代码：

#%%
import os
import numpy as np
import tensorflow as tf
from tensorflow.python.keras import layers
from tensorflow.python.keras import losses
from tensorflow.python.keras import models
#%%

x_train_filenames = []
y_train_filenames = []

x_eval_filenames = []
y_eval_filenames = []

for x in range(500):
    x_train_filenames.append(os.path.join('C:', os.sep, 'Users', 'Peteris.Zvejnieks', 'Data', 'Train', 'Images', 'gen_{}_.png'.format(x)))
    y_train_filenames.append(os.path.join('C:', os.sep, 'Users', 'Peteris.Zvejnieks', 'Data', 'Train', 'Labels', 'gen_{}_seg_.png'.format(x)))

    x_eval_filenames.append(os.path.join('C:', os.sep, 'Users', 'Peteris.Zvejnieks', 'Data', 'Evaluate', 'Images', 'gen_{}_.png'.format(x)))
    y_eval_filenames.append(os.path.join('C:', os.sep, 'Users', 'Peteris.Zvejnieks', 'Data', 'Evaluate', 'Labels', 'gen_{}_seg_.png'.format(x)))

num_train_examples = len(x_train_filenames)
num_eval_examples = len(x_eval_filenames)
#%%

size = 64
img_shape = (size, size, 1)
batch_size = 50
epochs = 10
threads = 5

def _process_pathnames(img_name, lbl_name):
    img_str = tf.io.read_file(img_name)
    img = tf.image.decode_png(img_str)

    lbl_str = tf.io.read_file(lbl_name)
    lbl = tf.image.decode_png(lbl_str)

    return img, lbl

training_dataset = tf.data.Dataset.from_tensor_slices((x_train_filenames, y_train_filenames))
training_dataset = training_dataset.map(_process_pathnames, num_parallel_calls=threads)
training_dataset = training_dataset.shuffle(num_train_examples)
training_dataset = training_dataset.repeat().batch(batch_size)

evaluation_dataset = tf.data.Dataset.from_tensor_slices(((x_eval_filenames, y_eval_filenames)))
evaluation_dataset = evaluation_dataset.map(_process_pathnames, num_parallel_calls=threads)
evaluation_dataset = evaluation_dataset.shuffle(num_eval_examples)
evaluation_dataset = evaluation_dataset.repeat().batch(batch_size)
#%%

def conv_block(input_tensor, num_filters):
      encoder = layers.Conv2D(num_filters, (3, 3), padding='same')(input_tensor)
      encoder = layers.BatchNormalization()(encoder)
      encoder = layers.Activation('relu')(encoder)
      encoder = layers.Conv2D(num_filters, (3, 3), padding='same')(encoder)
      encoder = layers.BatchNormalization()(encoder)
      encoder = layers.Activation('relu')(encoder)
      return encoder

def encoder_block(input_tensor, num_filters):
      encoder = conv_block(input_tensor, num_filters)
      encoder_pool = layers.MaxPooling2D((2, 2), strides=(2, 2))(encoder)

      return encoder_pool, encoder

def decoder_block(input_tensor, concat_tensor, num_filters):
      decoder = layers.Conv2DTranspose(num_filters, (2, 2), strides=(2, 2), padding='same')(input_tensor)
      decoder = layers.concatenate([concat_tensor, decoder], axis=-1)
      decoder = layers.BatchNormalization()(decoder)
      decoder = layers.Activation('relu')(decoder)
      decoder = layers.Conv2D(num_filters, (3, 3), padding='same')(decoder)
      decoder = layers.BatchNormalization()(decoder)
      decoder = layers.Activation('relu')(decoder)
      decoder = layers.Conv2D(num_filters, (3, 3), padding='same')(decoder)
      decoder = layers.BatchNormalization()(decoder)
      decoder = layers.Activation('relu')(decoder)
      return decoder

inputs = layers.Input(shape=img_shape)                        # 256
encoder0_pool, encoder0 = encoder_block(inputs, 32)           # 128
encoder1_pool, encoder1 = encoder_block(encoder0_pool, 64)    # 64
encoder2_pool, encoder2 = encoder_block(encoder1_pool, 128)   # 32
encoder3_pool, encoder3 = encoder_block(encoder2_pool, 256)   # 16
encoder4_pool, encoder4 = encoder_block(encoder3_pool, 512)   # 8
center = conv_block(encoder4_pool, 1024)                      # center
decoder4 = decoder_block(center, encoder4, 512)               # 16
decoder3 = decoder_block(decoder4, encoder3, 256)             # 32
decoder2 = decoder_block(decoder3, encoder2, 128)             # 64
decoder1 = decoder_block(decoder2, encoder1, 64)              # 128
decoder0 = decoder_block(decoder1, encoder0, 32)              # 256
outputs = layers.Conv2D(1, (1, 1), activation='sigmoid')(decoder0)

model = models.Model(inputs=[inputs], outputs=[outputs])
#%%

def dice_coeff(y_true, y_pred):
    smooth = 1.
    # Flatten
    y_true_f = tf.reshape(y_true, [-1])
    y_pred_f = tf.reshape(y_pred, [-1])
    intersection = tf.reduce_sum(y_true_f * y_pred_f)
    score = (2. * intersection + smooth) / (tf.reduce_sum(y_true_f) + tf.reduce_sum(y_pred_f) + smooth)
    return score

def dice_loss(y_true, y_pred):
    loss = 1 - dice_coeff(y_true, y_pred)
    return loss

def bce_dice_loss(y_true, y_pred):
    loss = losses.binary_crossentropy(y_true, y_pred) + dice_loss(y_true, y_pred)
    return loss

model.compile(optimizer='adam', loss=bce_dice_loss, metrics=[dice_loss])
save_model_path = os.path.join('C:', os.sep, 'Users', 'Peteris.Zvejnieks', 'Data', 'tmp', 'weights.hdf5')
cp = tf.keras.callbacks.ModelCheckpoint(filepath=save_model_path, monitor='val_dice_loss', save_best_only=True, verbose=1)
#%%

history = model.fit(training_dataset, 
                   steps_per_epoch=int(np.ceil(num_train_examples / float(batch_size))),
                   epochs=epochs,
                   validation_data=evaluation_dataset,
                   validation_steps=int(np.ceil(num_eval_examples / float(batch_size))),
                   callbacks=[cp])

大多数时候，我会收到此错误：

    Traceback (most recent call last):

  File "<ipython-input-37-80ce63ea7414>", line 1, in <module>
    runfile('C:/Users/Peteris.Zvejnieks/Data/U-Net_cOpY.py', wdir='C:/Users/Peteris.Zvejnieks/Data')

  File "C:\ProgramData\Anaconda3\envs\tf_build_env\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 827, in runfile
    execfile(filename, namespace)

  File "C:\ProgramData\Anaconda3\envs\tf_build_env\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 110, in execfile
    exec(compile(f.read(), filename, 'exec'), namespace)

  File "C:/Users/Peteris.Zvejnieks/Data/U-Net_cOpY.py", line 162, in <module>
    callbacks=[cp])

  File "C:\ProgramData\Anaconda3\envs\tf_build_env\lib\site-packages\tensorflow\python\keras\engine\training.py", line 880, in fit
    validation_steps=validation_steps)

  File "C:\ProgramData\Anaconda3\envs\tf_build_env\lib\site-packages\tensorflow\python\keras\engine\training_arrays.py", line 266, in model_iteration
    batch_outs = f(actual_inputs)

  File "C:\ProgramData\Anaconda3\envs\tf_build_env\lib\site-packages\tensorflow\python\keras\backend.py", line 3076, in __call__
    run_metadata=self.run_metadata)

  File "C:\ProgramData\Anaconda3\envs\tf_build_env\lib\site-packages\tensorflow\python\client\session.py", line 1439, in __call__
    run_metadata_ptr)

  File "C:\ProgramData\Anaconda3\envs\tf_build_env\lib\site-packages\tensorflow\python\framework\errors_impl.py", line 528, in __exit__
    c_api.TF_GetCode(self.status.status))

ResourceExhaustedError: OOM when allocating tensor with shape[5,32,64,64] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
     [[{{node training/Adam/gradients/zeros_150}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

     [[{{node metrics/dice_loss/div_no_nan}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

最近的错误消息：

An error ocurred while starting the kernel
2019 10:40:25.118724: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX AVX2
2019 10:40:25.248723: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1433] Found device 0 with properties: 
name: Quadro K2000D major: 3 minor: 0 memoryClockRate(GHz): 0.954
pciBusID: 0000:01:00.0
totalMemory: 2.00GiB freeMemory: 1.63GiB
2019 10:40:25.248723: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1512] Adding visible gpu devices: 0
2019 10:40:25.638719: I tensorflow/core/common_runtime/gpu/gpu_device.cc:984] Device interconnect StreamExecutor with strength 1 edge matrix:
2019 10:40:25.638719: I tensorflow/core/common_runtime/gpu/gpu_device.cc:990] 0 
2019 10:40:25.638719: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1003] 0: N 
2019 10:40:25.638719: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 1396 MB memory) ‑> physical GPU (device: 0, name: Quadro K2000D, pci bus id: 0000:01:00.0, compute capability: 3.0)
2019 10:40:40.136574: W tensorflow/core/common_runtime/bfc_allocator.cc:211] Allocator (GPU_0_bfc) ran out of memory trying to allocate 703.40MiB. The caller indicates that this is not a failure, but may mean that there could be performance gains if more memory were available.
2019 10:40:40.196574: W tensorflow/core/common_runtime/bfc_allocator.cc:211] Allocator (GPU_0_bfc) ran out of memory trying to allocate 641.25MiB. The caller indicates that this is not a failure, but may mean that there could be performance gains if more memory were available.
2019 10:40:40.576570: W tensorflow/core/common_runtime/bfc_allocator.cc:211] Allocator (GPU_0_bfc) ran out of memory trying to allocate 1.27GiB. The caller indicates that this is not a failure, but may mean that there could be performance gains if more memory were available.
2019 10:40:40.636569: W tensorflow/core/common_runtime/bfc_allocator.cc:211] Allocator (GPU_0_bfc) ran out of memory trying to allocate 1.20GiB. The caller indicates that this is not a failure, but may mean that there could be performance gains if more memory were available.
2019 10:40:42.116554: W tensorflow/core/common_runtime/bfc_allocator.cc:211] Allocator (GPU_0_bfc) ran out of memory trying to allocate 2.44GiB. The caller indicates that this is not a failure, but may mean that there could be performance gains if more memory were available.
2019 10:40:42.246553: W tensorflow/core/common_runtime/bfc_allocator.cc:211] Allocator (GPU_0_bfc) ran out of memory trying to allocate 2.39GiB. The caller indicates that this is not a failure, but may mean that there could be performance gains if more memory were available.
2019 10:40:46.028515: E tensorflow/stream_executor/cuda/cuda_driver.cc:981] failed to synchronize the stop event: CUDA_ERROR_LAUNCH_TIMEOUT: the launch timed out and was terminated
2019 10:40:46.028515: E tensorflow/stream_executor/cuda/cuda_timer.cc:55] Internal: error destroying CUDA event in context 00000000209996B0: CUDA_ERROR_LAUNCH_TIMEOUT: the launch timed out and was terminated
2019 10:40:46.028515: E tensorflow/stream_executor/cuda/cuda_timer.cc:60] Internal: error destroying CUDA event in context 00000000209996B0: CUDA_ERROR_LAUNCH_TIMEOUT: the launch timed out and was terminated
2019 10:40:46.028515: F tensorflow/stream_executor/cuda/cuda_dnn.cc:194] Check failed: status == CUDNN_STATUS_SUCCESS (7 vs. 0)Failed to set cuDNN stream.

我已经正确安装了tensorflow-gpu吗？

0 个答案: