我在Windows和Ubuntu上都遇到此问题:
Fitting 10 folds for each of 12 candidates, totalling 120 fits
[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.
exception calling callback for <Future at 0x7f45139d8580 state=finished raised TerminatedWorkerError>
Traceback (most recent call last):
File "/home/vhviveiros/anaconda3/envs/tf-gpu/lib/python3.8/site-packages/joblib/externals/loky/_base.py", line 625, in _invoke_callbacks
callback(self)
File "/home/vhviveiros/anaconda3/envs/tf-gpu/lib/python3.8/site-packages/joblib/parallel.py", line 347, in __call__
self.parallel.dispatch_next()
File "/home/vhviveiros/anaconda3/envs/tf-gpu/lib/python3.8/site-packages/joblib/parallel.py", line 780, in dispatch_next
if not self.dispatch_one_batch(self._original_iterator):
File "/home/vhviveiros/anaconda3/envs/tf-gpu/lib/python3.8/site-packages/joblib/parallel.py", line 847, in dispatch_one_batch
self._dispatch(tasks)
File "/home/vhviveiros/anaconda3/envs/tf-gpu/lib/python3.8/site-packages/joblib/parallel.py", line 765, in _dispatch
job = self._backend.apply_async(batch, callback=cb)
File "/home/vhviveiros/anaconda3/envs/tf-gpu/lib/python3.8/site-packages/joblib/_parallel_backends.py", line 529, in apply_async
future = self._workers.submit(SafeFunction(func))
File "/home/vhviveiros/anaconda3/envs/tf-gpu/lib/python3.8/site-packages/joblib/externals/loky/reusable_executor.py", line 177, in submit
return super(_ReusablePoolExecutor, self).submit(
File "/home/vhviveiros/anaconda3/envs/tf-gpu/lib/python3.8/site-packages/joblib/externals/loky/process_executor.py", line 1102, in submit
raise self._flags.broken
joblib.externals.loky.process_executor.TerminatedWorkerError: A worker process managed by the executor was unexpectedly terminated. This could be caused by a segmentation fault while calling the function or by an excessive memory usage causing the Operating System to kill the worker.
The exit codes of the workers are {SIGABRT(-6)}
Traceback (most recent call last):
File "/home/vhviveiros/GitHub/trabalho_covid/classify.py", line 18, in <module>
cf.validation(batch_size=[32, 16, 24], epochs=[100, 250, 200, 500])
File "/home/vhviveiros/GitHub/trabalho_covid/classifier.py", line 67, in validation
grid_search = grid_search.fit(self.X_train, self.y_train)
File "/home/vhviveiros/anaconda3/envs/tf-gpu/lib/python3.8/site-packages/sklearn/utils/validation.py", line 73, in inner_f
return f(**kwargs)
File "/home/vhviveiros/anaconda3/envs/tf-gpu/lib/python3.8/site-packages/sklearn/model_selection/_search.py", line 736, in fit
self._run_search(evaluate_candidates)
File "/home/vhviveiros/anaconda3/envs/tf-gpu/lib/python3.8/site-packages/sklearn/model_selection/_search.py", line 1188, in _run_search
evaluate_candidates(ParameterGrid(self.param_grid))
File "/home/vhviveiros/anaconda3/envs/tf-gpu/lib/python3.8/site-packages/sklearn/model_selection/_search.py", line 708, in evaluate_candidates
out = parallel(delayed(_fit_and_score)(clone(base_estimator),
File "/home/vhviveiros/anaconda3/envs/tf-gpu/lib/python3.8/site-packages/joblib/parallel.py", line 1042, in __call__
self.retrieve()
File "/home/vhviveiros/anaconda3/envs/tf-gpu/lib/python3.8/site-packages/joblib/parallel.py", line 921, in retrieve
self._output.extend(job.get(timeout=self.timeout))
File "/home/vhviveiros/anaconda3/envs/tf-gpu/lib/python3.8/site-packages/joblib/_parallel_backends.py", line 540, in wrap_future_result
return future.result(timeout=timeout)
File "/home/vhviveiros/anaconda3/envs/tf-gpu/lib/python3.8/concurrent/futures/_base.py", line 439, in result
return self.__get_result()
File "/home/vhviveiros/anaconda3/envs/tf-gpu/lib/python3.8/concurrent/futures/_base.py", line 388, in __get_result
raise self._exception
File "/home/vhviveiros/anaconda3/envs/tf-gpu/lib/python3.8/site-packages/joblib/externals/loky/_base.py", line 625, in _invoke_callbacks
callback(self)
File "/home/vhviveiros/anaconda3/envs/tf-gpu/lib/python3.8/site-packages/joblib/parallel.py", line 347, in __call__
self.parallel.dispatch_next()
File "/home/vhviveiros/anaconda3/envs/tf-gpu/lib/python3.8/site-packages/joblib/parallel.py", line 780, in dispatch_next
if not self.dispatch_one_batch(self._original_iterator):
File "/home/vhviveiros/anaconda3/envs/tf-gpu/lib/python3.8/site-packages/joblib/parallel.py", line 847, in dispatch_one_batch
self._dispatch(tasks)
File "/home/vhviveiros/anaconda3/envs/tf-gpu/lib/python3.8/site-packages/joblib/parallel.py", line 765, in _dispatch
job = self._backend.apply_async(batch, callback=cb)
File "/home/vhviveiros/anaconda3/envs/tf-gpu/lib/python3.8/site-packages/joblib/_parallel_backends.py", line 529, in apply_async
future = self._workers.submit(SafeFunction(func))
File "/home/vhviveiros/anaconda3/envs/tf-gpu/lib/python3.8/site-packages/joblib/externals/loky/reusable_executor.py", line 177, in submit
return super(_ReusablePoolExecutor, self).submit(
File "/home/vhviveiros/anaconda3/envs/tf-gpu/lib/python3.8/site-packages/joblib/externals/loky/process_executor.py", line 1102, in submit
raise self._flags.broken
TerminatedWorkerError: A worker process managed by the executor was unexpectedly terminated. This could be caused by a segmentation fault while calling the function or by an excessive memory usage causing the Operating System to kill the worker.
The exit codes of the workers are {SIGABRT(-6)}
已完成的操作
OBS: 在Windows上运行此代码时,它将同时填满整个ram和gpu内存,从而冻结系统。在n_jobs = 1的情况下,该进程平均使用2 GB内存(以及其他参数)。输入文件只是一个524x254 .csv。
环境:Conda
硬件
代码示例:
validation(batch_size=[32, 16, 24], epochs=[100, 250, 200, 500])
不同的文件
from keras.wrappers.scikit_learn import KerasClassifier
from models import classifier_model
from sklearn.model_selection import GridSearchCV, train_test_split
import pandas as pd
from sklearn.preprocessing import StandardScaler
import datetime
from utils import check_folder
import tensorflow as tf
from sklearn.metrics import confusion_matrix
def validation(self, cv=10, batch_size=-1, epochs=-1):
classifier = KerasClassifier(build_fn=classifier_model)
parameters = {'batch_size': batch_size,
'epochs': epochs,
'optimizer': ['adam'],
'activation': ['relu'],
'activationOutput': ['sigmoid']}
self.metrics = ['accuracy', 'roc_auc', 'precision', 'recall']
grid_search = GridSearchCV(estimator=classifier,
verbose=2,
param_grid=parameters,
n_jobs=2,
scoring=self.metrics,
refit='precision',
return_train_score=False,
cv=cv)
grid_search = grid_search.fit(self.X_train, self.y_train)
return grid_search
不同的文件
from tensorflow.keras.layers import Dropout, Dense
from tensorflow.keras.metrics import AUC, Precision, Recall
from tensorflow.keras.models import Sequential
def classifier_model(optimizer, activation, activationOutput):
classifier = Sequential()
classifier.add(Dense(units=200, activation=activation, input_shape=(254,)))
classifier.add(Dropout(rate=0.2))
classifier.add(Dense(units=200, activation=activation))
classifier.add(Dropout(rate=0.2))
classifier.add(Dense(units=200, activation=activation))
classifier.add(Dropout(rate=0.2))
classifier.add(Dense(units=200, activation=activation))
classifier.add(Dropout(rate=0.2))
classifier.add(Dense(units=200, activation=activation))
classifier.add(Dropout(rate=0.2))
classifier.add(Dense(units=200, activation=activation))
classifier.add(Dropout(rate=0.2))
classifier.add(Dense(units=1, activation=activationOutput))
classifier.compile(optimizer=optimizer,
loss='binary_crossentropy', metrics=['accuracy', AUC(), Precision(), Recall()])
return classifier
答案 0 :(得分:0)
我通过添加模型文件(第三个示例)解决了这个问题:
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.compat.v1.Session(config=config)