我正在关注this教程,并希望进行超参数优化。但是,rs.best_params_的输出是冻结的对象:
best params: {'c1': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7f3752d1f9b0>, 'c2': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7f3752d1fb38>}
如何将其“解冻”为c1和c2的正常值?
sklearn-crfsuite的大多数功能似乎已过时,我不得不进行一些更改,现在我的完整代码是:
from itertools import chain
import nltk
import sklearn
import scipy.stats
from sklearn.metrics import make_scorer
from sklearn.model_selection import GridSearchCV
import sklearn_crfsuite
from sklearn_crfsuite import scorers
from sklearn_crfsuite import metrics
%%time
# define fixed parameters and parameters to search
crf = CRF(
algorithm='lbfgs',
max_iterations=100,
all_possible_transitions=True
)
params_space = {
'c1': [scipy.stats.expon(scale=0.5)],
'c2': [scipy.stats.expon(scale=0.05)],
}
# use the same metric for evaluation
f1_scorer = make_scorer(metrics.flat_f1_score,
average='weighted')
# search
rs = GridSearchCV(crf, params_space,
cv=3,
verbose=1,
n_jobs=-1,
scoring=f1_scorer)
rs.fit(X_train, y_train)
print('best params:', rs.best_params_)
print('best CV score:', rs.best_score_)
print('model size: {:0.2f}M'.format(rs.best_estimator_.size_ / 1000000))
_x = [s.parameters['c1'] for s in rs.cv_results_]
_y = [s.parameters['c2'] for s in rs.cv_results_]
_c = [s.mean_validation_score for s in rs.cv_results_]
fig = plt.figure()
fig.set_size_inches(12, 12)
ax = plt.gca()
ax.set_yscale('log')
ax.set_xscale('log')
ax.set_xlabel('C1')
ax.set_ylabel('C2')
ax.set_title("Randomized Hyperparameter Search CV Results (min={:0.3}, max={:0.3})".format(
min(_c), max(_c)
))
ax.scatter(_x, _y, c=_c, s=60, alpha=0.9, edgecolors=[0,0,0])
print("Dark blue => {:0.4}, dark red => {:0.4}".format(min(_c), max(_c)))