Question

我有两个线程，一个线程负责训练，另一个线程负责估计。我有几个实体，我想为每个实体都有一个模型，因此我可以“即时”加载和保存模型（我知道这很慢）。

如果每次我要调用预测函数时都加载模型，则此方法有效。但是，如果我只想加载一次模型，并连续做出几个预测，则有以下例外：

Traceback (most recent call last):
  File "/usr/lib/python3.6/threading.py", line 916, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.6/threading.py", line 1182, in run
    self.function(*self.args, **self.kwargs)
  File "/home/arroyadr/Proyectos/iot-ai-engine/src/trainer.py", line 388, in train
    self.update_prediction_historics_all()
  File "/home/arroyadr/Proyectos/iot-ai-engine/src/trainer.py", line 413, in update_prediction_historics_all
    self.update_prediction_historics_dataset(new_dataset, loadModel=True)
  File "/home/arroyadr/Proyectos/iot-ai-engine/src/trainer.py", line 444, in update_prediction_historics_dataset
    loadModel=False)[0]
  File "/home/arroyadr/Proyectos/iot-ai-engine/src/estimator.py", line 207, in get_predictions_sequential
    prediction = model.predict(new_data)
  File "/home/arroyadr/.local/lib/python3.6/site-packages/keras/engine/training.py", line 1169, in predict
    steps=steps)
  File "/home/arroyadr/.local/lib/python3.6/site-packages/keras/engine/training_arrays.py", line 294, in predict_loop
    batch_outs = f(ins_batch)
  File "/home/arroyadr/.local/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py", line 2715, in __call__
    return self._call(inputs)
  File "/home/arroyadr/.local/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py", line 2671, in _call
    session)
  File "/home/arroyadr/.local/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py", line 2623, in _make_callable
    callable_fn = session._make_callable_from_options(callable_opts)
  File "/home/arroyadr/.local/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1431, in _make_callable_from_options
    return BaseSession._Callable(self, callable_options)
  File "/home/arroyadr/.local/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1385, in __init__
    session._session, options_ptr, status)
  File "/home/arroyadr/.local/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py", line 526, in __exit__
    c_api.TF_GetCode(self.status.status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: Tensor lstm_1_input:0, specified in either feed_devices or fetch_devices was not found in the Graph

> Exception ignored in: <bound method BaseSession._Callable.__del__ of <tensorflow.python.client.session.BaseSession._Callable object at 0x7f1ca2b33748>>
Traceback (most recent call last):
  File "/home/arroyadr/.local/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1415, in __del__
    self._session._session, self._handle, status)
  File "/home/arroyadr/.local/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py", line 526, in __exit__
    c_api.TF_GetCode(self.status.status))
tensorflow.python.framework.errors_impl.CancelledError: Session has been closed.

我用于加载模型和预测的代码如下：

def load_model_file(self, path=None):
    """
    Load the model given in path
    :param path: path of the model. If it is None it loads a default model
    :return:
    """
    lock = Lock()
    lock.acquire()
    model = None
    if (path is not None):
        if (os.path.isfile(path)):
            if (not sklearn):
                model = load_model(path)
                # model = self.pred.get_model([1, self.num_previous_measures, 1, 1], activation=self.activation)
                # model.load_weights(path)
                model._make_predict_function()
                self.graph = tf.get_default_graph()

            # Load scalers
            scalers = []
            for i in range(self.num_features_dataset):
                scaler = joblib.load(
                    '../rsc/datasets/scalers/' + path.split("/")[3].split(".")[0] + str(i) + '.pkl')
                scalers.append(scaler)

    lock.release()

    if (model is None):
        self.logger.error("No model could be found for " + str(path))
        self.model_predict = None
        self.scalers_predict = None
        return None, None
    else:
        self.model_predict = model
        self.scalers_predict = scalers
        return model, scalers


def get_predictions_sequential(self, data, num_pred, column_data, path=None, loadModel=True):
    """
    Predicts a list of values from the data given as param.
    :param data: data (time series) from which predict the next value
    :param num_pred: number of predictions
    :param path: path where to read the model
    :return: list of predictions
    """

    # Load model, if there is no model, then it will try to train and set the scaler
    if (loadModel):
        # with filelock.FileLock(path + ".lock"):
        model, scalers = self.load_model_file(path)
    else:
        model = self.model_predict
        scalers = self.scalers_predict
        # model._make_predict_function()

    # Scale prediction data
    data = np.reshape(np.array(data), (1, self.num_previous_measures, self.num_features_dataset))
    for i in range(self.num_features_dataset):
        data2 = data[:, :, i].copy().reshape(1, self.num_previous_measures)
        data2 = np.insert(data2, self.num_previous_measures, data.mean())
        data2 = np.reshape(data2, (1, self.num_previous_measures + 1))
        data2 = scalers[i].transform(data2)
        data[:, :, i] = data2[0][:-1]

    predictions = []
    new_data = data.copy()

    for i in range(num_pred):
        if (not sklearn):
            with self.graph.as_default():
            #     with tf.Session(graph=self.graph) as sess:
            # sess = tf.Session()
            # K.set_session(sess)
                prediction = model.predict(new_data)
            # self.logger.info("Pred not scaled: "+ str(prediction[0]))
            prediction_rescaled = self.invert_scale(scalers[column_data - 1],
                                                    new_data[0, :, column_data - 1],
                                                    prediction[0][0])

    return predictions

我已经阅读并跟踪了此issue，但是我没有找到任何合适的解决方案。有人遇到过这个问题吗？

Answer 1

经过调查和更多的尝试/错误，我找到了解决方案。

关于会话已取消错误，此question的答案1可能有用：

K.clear_session（）在连续创建多个模型时（例如在超参数搜索或交叉验证期间）很有用。您训练的每个模型都会向图中添加节点（可能以数千为单位）。每当您（或Keras）调用tf.Session.run（）或tf.Tensor.eval（）时，TensorFlow都会执行整个图形，因此模型的训练速度将越来越慢，并且内存也可能用完。清除会话会删除以前模型中剩余的所有节点，从而释放内存并防止速度变慢。

所以我要做的是在加载新模型之前添加调用K.clear_session（），这样我们可以避免在同一图上加载多个模型。

另一方面，我不必致电

with self.graph.as_default():
   with tf.Session(graph=self.graph) as sess:
       backend.set_session(sess)

因此现在加载模型函数如下：

def load_model_file(self, path=None):
    """
    Load the model given in path
    :param path: path of the model. If it is None it loads a default model
    :return:
    """
    model = None
    if (path is not None):
        if (os.path.isfile(path)):
            if (not sklearn):
                K.clear_session()
                model = load_model(path)
                # model = self.pred.get_model([1, self.num_previous_measures, 1, 1], activation=self.activation)
                # model.load_weights(path)
                model._make_predict_function()
                self.graph = tf.get_default_graph()
            else:
                model = joblib.load(path)
            # Load scalers
            scalers = []
            for i in range(self.num_features_dataset):
                scaler = joblib.load(
                    '../rsc/datasets/scalers/' + path.split("/")[3].split(".")[0] + str(i) + '.pkl')
                scalers.append(scaler)

    if (model is None):
        self.logger.error("No model could be found for " + str(path))
        self.model_predict = None
        self.scalers_predict = None
        return None, None
    else:
        self.model_predict = model
        self.scalers_predict = scalers
        return model, scalers

对于多线程问题并因此加载不同的模型，请记住这一点：

每当我从其他类或线程调用函数get_predictions_sequential时，我应该直接在函数get_predictions_sequential 中直接使用：

with self.graph.as_default(): prediction = model.predict(new_data)

在此其他类中，请勿尝试调用任何与tf图相关的函数或会话设置函数，因为您将把其中一个类中的图与具有get_predictions_sequential函数的类中的图混合。

欢呼

无法在具有多线程环境的Tensorflow中的Keras中使用加载的模型

1 个答案: