我正在努力训练keras中的模型。我有4个gpus和1个cpu。当我开始训练keras / tensorflow尝试使用不存在的cpu 1设备。我正在运行keras 2.1.6并且最新的tensorflow版本是从源代码构建的。
with tf.device("/cpu:0"):
model = Model(inputs=inputs, outputs=x)
p_model = multi_gpu_model(model , 4)
p_model.compile(loss='categorical_crossentropy',
optimizer='rmsprop',
metrics=['accuracy'])
batch_size = 32
p_model.fit(x=train_x,
y=train_y,
batch_size=batch_size,
epochs=50,
verbose=1,
validation_data=(test_x, test_y),
shuffle=True)
我尝试开始训练时遇到以下错误。我不知道为什么它会尝试使用cpu 1,即使它不存在。
InvalidArgumentError Traceback (most recent call last)
/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
1321 try:
-> 1322 return fn(*args)
1323 except errors.OpError as e:
/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py in _run_fn(feed_dict, fetch_list, target_list, options, run_metadata)
1306 return self._call_tf_sessionrun(
-> 1307 options, feed_dict, fetch_list, target_list, run_metadata)
1308
/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py in _call_tf_sessionrun(self, options, feed_dict, fetch_list, target_list, run_metadata)
1408 self._session, options, feed_dict, fetch_list, target_list,
-> 1409 run_metadata)
1410 else:
InvalidArgumentError: Creating a partition for /device:CPU:1 which doesn't exist in the list of available devices. Available devices: /device:CPU:0,/device:GPU:0,/device:GPU:1,/device:GPU:2,/device:GPU:3
During handling of the above exception, another exception occurred:
InvalidArgumentError Traceback (most recent call last)
<ipython-input-19-ad177e58b8dc> in <module>()
19 verbose=1,
20 validation_data=(test_x, test_y),
---> 21 shuffle=True)
/usr/local/lib/python3.5/dist-packages/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, **kwargs)
1703 initial_epoch=initial_epoch,
1704 steps_per_epoch=steps_per_epoch,
-> 1705 validation_steps=validation_steps)
1706
1707 def evaluate(self, x=None, y=None,
/usr/local/lib/python3.5/dist-packages/keras/engine/training.py in _fit_loop(self, f, ins, out_labels, batch_size, epochs, verbose, callbacks, val_f, val_ins, shuffle, callback_metrics, initial_epoch, steps_per_epoch, validation_steps)
1234 ins_batch[i] = ins_batch[i].toarray()
1235
-> 1236 outs = f(ins_batch)
1237 if not isinstance(outs, list):
1238 outs = [outs]
/usr/local/lib/python3.5/dist-packages/keras/backend/tensorflow_backend.py in __call__(self, inputs)
2480 session = get_session()
2481 updated = session.run(fetches=fetches, feed_dict=feed_dict,
-> 2482 **self.session_kwargs)
2483 return updated[:len(self.outputs)]
2484
/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py in run(self, fetches, feed_dict, options, run_metadata)
898 try:
899 result = self._run(None, fetches, feed_dict, options_ptr,
--> 900 run_metadata_ptr)
901 if run_metadata:
902 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
1133 if final_fetches or final_targets or (handle and feed_dict_tensor):
1134 results = self._do_run(handle, final_targets, final_fetches,
-> 1135 feed_dict_tensor, options, run_metadata)
1136 else:
1137 results = []
/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
1314 if handle is None:
1315 return self._do_call(_run_fn, feeds, fetches, targets, options,
-> 1316 run_metadata)
1317 else:
1318 return self._do_call(_prun_fn, handle, feeds, fetches)
/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
1333 except KeyError:
1334 pass
-> 1335 raise type(e)(node_def, op, message)
1336
1337 def _extend_graph(self):
InvalidArgumentError: Creating a partition for /device:CPU:1 which doesn't exist in the list of available devices. Available devices: /device:CPU:0,/device:GPU:0,/device:GPU:1,/device:GPU:2,/device:GPU:3
答案 0 :(得分:0)
我的后端(tensorflow)安装有问题。