我是张量流的新手。我刚刚开始在后端使用带有张量流的Keras。我正在使用配备i7 16 GB内存和GeForce GT 750M的笔记本电脑。 Tensorflow版本1.10,Cuddatoolkit 9.1,cudnn 7.14
我已使用以下代码来训练预训练VGG16模型。我的训练集有300张图像。我检查了以下批处理大小为8的代码,它可以正常工作。但是,当我将批处理大小更改为16时,spyder中的内核在训练后无法重新加载,并出现以下错误:
tensorflow/stream_executor/cuda/cuda_driver.cc:1078] failed to synchronize the stop event: CUDA_ERROR_LAUNCH_FAILED
tensorflow/stream_executor/cuda/cuda_timer.cc:55] Internal: error destroying CUDA event in context 0000023F82C1E010: CUDA_ERROR_LAUNCH_FAILED
tensorflow/stream_executor/cuda/cuda_timer.cc:60] Internal: error destroying CUDA event in context 0000023F82C1E010: CUDA_ERROR_LAUNCH_FAILED
tensorflow/stream_executor/cuda/cuda_dnn.cc:211] Check failed: status == CUDNN_STATUS_SUCCESS (7 vs. 0)Failed to set cuDNN stream.
我使用了以下代码:
import sys
import glob
import argparse
import matplotlib.pyplot as plt
import tensorflow as tf
from keras import __version__
from keras.applications import vgg16
from keras.applications.vgg16 import preprocess_input
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import SGD
from keras import backend as K
IM_WIDTH, IM_HEIGHT = 299, 299
NB_EPOCHS = 2
BAT_SIZE = 16
#32
FC_SIZE = 256
def get_nb_files(directory):
"""Get number of files by searching directory recursively"""
if not os.path.exists(directory):
return 0
cnt = 0
for r, dirs, files in os.walk(directory):
for dr in dirs:
cnt += len(glob.glob(os.path.join(r, dr + "/*")))
return cnt
#********************************************************
def add_new_last_layer(base_model, nb_classes):
"""Add last layer to the convnet
Args:
base_model: keras model excluding top
nb_classes: # of classes
Returns:
new keras model with last layer
"""
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(FC_SIZE, activation='relu')(x)
predictions = Dense(nb_classes, activation='softmax')(x) #new softmax layer
model = Model(input=base_model.input, output=predictions)
return model
#*************************************************
def train(trndir,valdir,nbepoch,batchsize,plot):
"""Use transfer learning and fine-tuning to train a network on a new dataset"""
nb_train_samples = get_nb_files(trndir)
nb_classes = len(glob.glob(trndir + "/*"))
nb_val_samples = get_nb_files(valdir)
nb_epoch = int(nbepoch)
batch_size = int(batchsize)
# data prep
train_datagen = ImageDataGenerator(
preprocessing_function=preprocess_input,
rotation_range=30,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True
)
test_datagen = ImageDataGenerator(
preprocessing_function=preprocess_input,
rotation_range=30,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True
)
train_generator = train_datagen.flow_from_directory(
trndir,
target_size=(IM_WIDTH, IM_HEIGHT),
batch_size=batch_size,
)
validation_generator = test_datagen.flow_from_directory(
valdir,
target_size=(IM_WIDTH, IM_HEIGHT),
batch_size=batch_size,
)
# setup model
base_model = vgg16.VGG16(weights='imagenet', include_top=False)
model = add_new_last_layer(base_model, nb_classes)
print ("setup_to_transfer_learn")
setup_to_transfer_learn(model, base_model)
print (" model.fit_generator")
history_tl = model.fit_generator(
train_generator,
nb_epoch=nb_epoch,
samples_per_epoch=nb_train_samples,
validation_data=validation_generator,
nb_val_samples=nb_val_samples,
class_weight='auto')
model.save("vgg-ft.model")
def setup_to_transfer_learn(model, base_model):
"""Freeze all layers and compile the model"""
for layer in base_model.layers:
layer.trainable = False
#Configures the model for training.
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
if __name__=="__main__":
Imgdir="D:/NHQ/coursea_computervision/TensorFlowGPU/training_set - Copy"
K.clear_session()
with tf.device('/gpu:0'):
cnt=get_nb_files(Imgdir)
print("cnt:",cnt)
trndir=Imgdir
valdir=Imgdir
train(trndir,valdir,NB_EPOCHS,BAT_SIZE,True)
答案 0 :(得分:0)
尝试将其放在代码的开头(如果使用GPU) 不幸的是,这可能无法正常工作,因为通常这是由于内存不足而引起的。
from keras import backend as K
cfg = K.tf.ConfigProto()
cfg.gpu_options.allow_growth = True
K.set_session(K.tf.Session(config=cfg))