当我尝试使用以下代码从pickle加载sklearn logistic回归模型时
def _feed(gpu):
log_info("prediction process started")
model = pickle.load(open(model_path, 'rb'))
while True:
X = pending_q.get()
if not X:
log_info("stop pending thread")
return
# first_elem_non_neg_one_count = (X[-1][0] == -1).sum()
# log_info("non negative one inside X: %d" % first_elem_non_neg_one_count)
if X[0] > WINDOW_SIZE: # only begin prediction when we've got enough frames to compute means and stds
predicted = model.predict(X[-1])
# TODO: count consecutive and recent X # only
# sum_count = (predicted.flatten()[MOST_X_FRAME:] >= THRESHOLD).sum()
# log_info("***** got %d frames > %.1f in most recent %d frames" % (sum_count, THRESHOLD, abs(MOST_X_FRAME)))
log_info("frame: %d, predicted: %d" % (X[0], predicted))
predicted = X[:2] + predicted
else:
log_info("warming up, skip inference...")
predicted = X[:2] + [-1]
predicted_q.put(predicted) # TODO: batch prediction
我遇到以下错误:
Exception in thread Thread-6:
Traceback (most recent call last):
File "/home/support/.pyenv/versions/3.6.6/lib/python3.6/threading.py", line 916, in _bootstrap_inner
self.run()
File "/home/support/.pyenv/versions/3.6.6/lib/python3.6/threading.py", line 864, in run
self._target(*self._args, **self._kwargs)
File "/app/xxxxx/04_predict_ps.py", line 106, in _feed
model = pickle.load(model_file)
File "/home/support/.pyenv/versions/va-worker-3.6.6/lib/python3.6/site-packages/sklearn/linear_model/__init__.py", line 12, in <module>
from .base import LinearRegression
File "/home/support/.pyenv/versions/va-worker-3.6.6/lib/python3.6/site-packages/sklearn/linear_model/base.py", line 38, in <module>
from ..preprocessing.data import normalize as f_normalize
File "/home/support/.pyenv/versions/va-worker-3.6.6/lib/python3.6/site-packages/sklearn/preprocessing/__init__.py", line 8, in <module>
from .data import Binarizer
File "/home/support/.pyenv/versions/va-worker-3.6.6/lib/python3.6/site-packages/sklearn/preprocessing/data.py", line 19, in <module>
from scipy import stats
File "/home/support/.pyenv/versions/va-worker-3.6.6/lib/python3.6/site-packages/scipy/stats/__init__.py", line 345, in <module>
from .stats import *
File "/home/support/.pyenv/versions/va-worker-3.6.6/lib/python3.6/site-packages/scipy/stats/stats.py", line 171, in <module>
from . import distributions
File "/home/support/.pyenv/versions/va-worker-3.6.6/lib/python3.6/site-packages/scipy/stats/distributions.py", line 13, in <module>
from . import _continuous_distns
File "/home/support/.pyenv/versions/va-worker-3.6.6/lib/python3.6/site-packages/scipy/stats/_continuous_distns.py", line 6692, in <module>
pairs = list(globals().items())
RuntimeError: dictionary changed size during iteration
我使用以下内容保存模型:
sk_logistic_regr = LogisticRegression()
sk_logistic_regr.fit(x_train, y_train)
with open(SK_MODEL_NAME, 'wb') as file:
pickle.dump(sk_logistic_regr, file)
我不确定是否是因为我杀死脚本时没有正确关闭pickle文件。对造成这种情况的原因有任何想法吗?看来,模型加载代码仅是第一次成功。
P.S。 _feed
函数在专用于推断的单独线程上运行
与此同时,我使用了from joblib import dump, load
,它在sklearn文档中建议用来解决问题。