我使用keras / tensorflow在jupyter笔记本中编写了代码。当我在可在cpu上运行tensorflow的笔记本电脑上运行该代码时,该代码运行良好,但是当我在可在gpu上运行tensorflow的家用计算机上运行相同的代码时,收到消息“无法创建会话”。如果可能的话,我想继续使用gpu。
我将在下面包括我的代码和错误以及终端的输出。
代码(我不包括加载/操作数据的代码其他部分):
from sklearn.model_selection import StratifiedKFold
l2_reg = 0.4
momentum = 0.99
seed = 5
def create_model_val(x_train, y_train, x_val, y_val, layers=[20, 20, 4],
kernel_init ='he_uniform', bias_init ='he_uniform',
batch_norm=True, dropout=True):
model = Sequential()
# layer 1
model.add(Dense(layers[0], input_dim=x_train.shape[1],
W_regularizer=l2(l2_reg),
kernel_initializer=kernel_init,
bias_initializer=bias_init))
if batch_norm == True:
model.add(BatchNormalization(axis=-1, momentum=momentum, center=True))
model.add(Activation(params['activation']))
if dropout == True:
model.add(Dropout(params['dropout']))
# layer 2+
for layer in range(0, len(layers)-1):
model.add(Dense(layers[layer+1], W_regularizer=l2(l2_reg),
kernel_initializer=kernel_init,
bias_initializer=bias_init))
if batch_norm == True:
model.add(BatchNormalization(axis=-1, momentum=momentum, center=True))
model.add(Activation(params['activation']))
if dropout == True:
model.add(Dropout(params['dropout']))
# Last layer
model.add(Dense(layers[-1], activation=params['last_activation'],
kernel_initializer=kernel_init,
bias_initializer=bias_init))
model.compile(loss=params['losses'],
optimizer=keras.optimizers.adam(lr=params['lr']),
metrics=['accuracy'])
model_output = {'model':model}
return model_output
def train_and_evaluate_model(model, x_train, y_train, x_val, y_val):
history = model.fit(x_train, y_train,
validation_data=[x_val, y_val],
batch_size=params['batch_size'],
epochs=params['epochs'],verbose=0)
return history
n_folds = 10
total_acc = []
skf = StratifiedKFold(n_splits=n_folds, shuffle=True)
for i, (train_idx, val_idx) in enumerate(skf.split(x_main_normalized, y_main)):
print("Running Fold", i+1, "/", n_folds)
x_train_2, x_val_2 = x_main_normalized.iloc[train_idx], x_main_normalized.iloc[val_idx]
y_train_2, y_val_2 = y_main.iloc[train_idx], y_main.iloc[val_idx]
# have to change to one hot encode after split or else it yells at you *sadness*
if params['losses']=='categorical_crossentropy':
y_train_2 = to_categorical(y_train_2, num_classes=4)
y_val_2 = to_categorical(y_val_2, num_classes=4)
model = None # Clearing the NN.
model = create_model_val(x_train_2, y_train_2, x_val_2, y_val_2, layers=[20, 20, 4],
kernel_init ='he_uniform', bias_init ='he_uniform',
batch_norm=True, dropout=True)
history = train_and_evaluate_model(model['model'],x_train_2, y_train_2, x_val_2, y_val_2)
history_dict = history.history
train_acc = history_dict['acc']
val_acc = history_dict['val_acc']
last_acc = round(train_acc[-1],4)*100
last_val_acc = round(val_acc[-1],4)*100
total_acc.append([train_acc[-1], val_acc[-1]])
print("Last training accuracy: " + str(last_acc)+'%'
+ ", last validation accuracy: " + str(last_val_acc)+'%')
Jupyter笔记本错误消息:
Running Fold 1 / 10
/home/mikedoho/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:19: UserWarning: Update your `Dense` call to the Keras 2 API: `Dense(20, input_dim=46, kernel_initializer="he_uniform", bias_initializer="he_uniform", kernel_regularizer=<keras.reg...)`
/home/mikedoho/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:34: UserWarning: Update your `Dense` call to the Keras 2 API: `Dense(20, kernel_initializer="he_uniform", bias_initializer="he_uniform", kernel_regularizer=<keras.reg...)`
/home/mikedoho/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:34: UserWarning: Update your `Dense` call to the Keras 2 API: `Dense(4, kernel_initializer="he_uniform", bias_initializer="he_uniform", kernel_regularizer=<keras.reg...)`
---------------------------------------------------------------------------
InternalError Traceback (most recent call last)
<ipython-input-24-db5ca8962dc5> in <module>()
91 batch_norm=True, dropout=True)
92
---> 93 history = train_and_evaluate_model(model['model'],x_train_2, y_train_2, x_val_2, y_val_2)
94
95 history_dict = history.history
<ipython-input-24-db5ca8962dc5> in train_and_evaluate_model(model, x_train, y_train, x_val, y_val)
64 validation_data=[x_val, y_val],
65 batch_size=params['batch_size'],
---> 66 epochs=params['epochs'],verbose=0)
67 return history
68
~/anaconda3/lib/python3.6/site-packages/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, **kwargs)
1040 initial_epoch=initial_epoch,
1041 steps_per_epoch=steps_per_epoch,
-> 1042 validation_steps=validation_steps)
1043
1044 def evaluate(self, x=None, y=None,
~/anaconda3/lib/python3.6/site-packages/keras/engine/training_arrays.py in fit_loop(model, f, ins, out_labels, batch_size, epochs, verbose, callbacks, val_f, val_ins, shuffle, callback_metrics, initial_epoch, steps_per_epoch, validation_steps)
197 ins_batch[i] = ins_batch[i].toarray()
198
--> 199 outs = f(ins_batch)
200 if not isinstance(outs, list):
201 outs = [outs]
~/anaconda3/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py in __call__(self, inputs)
2651
2652 def __call__(self, inputs):
-> 2653 if hasattr(get_session(), '_make_callable_from_options'):
2654 if py_any(is_sparse(x) for x in self.inputs):
2655 if py_any(is_tensor(x) for x in inputs):
~/anaconda3/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py in get_session()
181 config = tf.ConfigProto(intra_op_parallelism_threads=num_thread,
182 allow_soft_placement=True)
--> 183 _SESSION = tf.Session(config=config)
184 session = _SESSION
185 if not _MANUAL_VAR_INIT:
~/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py in __init__(self, target, graph, config)
1561
1562 """
-> 1563 super(Session, self).__init__(target, graph, config=config)
1564 # NOTE(mrry): Create these on first `__enter__` to avoid a reference cycle.
1565 self._default_graph_context_manager = None
~/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py in __init__(self, target, graph, config)
631 if self._created_with_new_api:
632 # pylint: disable=protected-access
--> 633 self._session = tf_session.TF_NewSession(self._graph._c_graph, opts)
634 # pylint: enable=protected-access
635 else:
InternalError: Failed to create session.
终端消息:
Adapting to protocol v5.1 for kernel effcd29a-2f4c-4e0e-8d39-f5993f09f90e
2018-08-15 18:48:40.171924: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2 FMA
2018-08-15 18:48:40.270262: E tensorflow/core/common_runtime/direct_session.cc:158] Internal: failed initializing StreamExecutor for CUDA device ordinal 0: Internal: failed call to cuDevicePrimaryCtxRetain: CUDA_ERROR_OUT_OF_MEMORY; total memory reported: 11718230016
答案 0 :(得分:0)
放在我的代码开头:
from keras import backend as K
cfg = K.tf.ConfigProto()
cfg.gpu_options.allow_growth = True
K.set_session(K.tf.Session(config=cfg))
信用:http://forums.fast.ai/t/tip-clear-tensorflow-gpu-memory/1979/7