此MultinomialNB实例尚未安装。使用此方法之前,请使用适当的参数调用“ fit”

时间:2018-09-23 08:51:17

标签: python scikit-learn

我使用partial_fit()进行增量训练,代码为:

from sklearn.naive_bayes import MultinomialNB
from sklearn.naive_bayes import BernoulliNB
from sklearn.linear_model import Perceptron
from sklearn.linear_model import SGDClassifier
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn import metrics

import pandas as pd
import numpy as np
import os
import csv
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt


def iter_minibatches(data_stream, minibatch_size=1024):
    X = []
    y = []
    cur_line_num = 0

    csvfile = open(data_stream,mode='r')
    reader = csv.reader(csvfile)
    index = 0
    for line in reader:
        if index == 0:
            continue
        index +=1
        y.append(float(line[-1]))
        X.append(line[:len(line)-1])  # 这里要将数据转化成float类型

        cur_line_num += 1
        if cur_line_num >= minibatch_size:
            X, y = np.array(X), np.array(y)  # 将数据转成numpy的array类型并返回
            X = X.astype('float32')
            y = y.astype('float32')
            yield X, y
            X, y = [], []
            cur_line_num = 0
    csvfile.close()

if __name__=="__main__":
    ......

    # 生成测试文件
    minibatch_test_iterators = iter_minibatches(test_file_name, minibatch_size=test_batch_size)


    algorithms={'SGDClassifier':SGDClassifier(),  # SGDClassifier的参数设置可以参考sklearn官网
                'Perceptron':Perceptron(),
                # 'PassiveAggressiveClassifier':PassiveAggressiveClassifier(),
                'MultinomialNB':MultinomialNB(),
                'BernoulliNB':BernoulliNB()
    }

    per_batch_scroes = {}

    plt.figure(figsize=(10,10))

    for algo_name in algorithms.keys():

        model = algorithms[algo_name]
        minibatch_train_iterators = iter_minibatches(train_file_name, minibatch_size=train_batch_size)
        per_scroes =[]

        for i, (X_train, y_train) in enumerate(minibatch_train_iterators):
            # 得到一份测试文件
            X_test, y_test = minibatch_test_iterators.__next__()

            # 使用 partial_fit ,并在第一次调用 partial_fit 的时候指定 classes
            model.partial_fit(X_train, y_train, classes=np.array([0, 1]))
            # 当前次数
            print("{} time".format(i))
            # 在测试集上看效果
            y_test.reshape(-1,1)
            curr_score = model.score(X_test, y_test)
            print("{} score".format(curr_score))
            per_scroes.append(curr_score)

        per_batch_scroes[algo_name] = per_scroes

        sk_test_y = test['class']
        sk_test_x = test.drop(['class'],axis=1)
        test_class_preds = model.predict(sk_test_x.values)
        fpr, tpr, thresh = metrics.roc_curve(sk_test_y.values,test_class_preds)
        auc = metrics.roc_auc_score(sk_test_y.values,test_class_preds)
        plt.plot(fpr,tpr,label=algo_name+", auc="+str(float('%.4f'%auc)))

但是我得到了错误:

Traceback (most recent call last):
  File "skOnline_murray.py", line 106, in <module>
    test_class_preds = model.predict(sk_test_x.values)
  File "/usr/local/lib/python3.5/dist-packages/sklearn/naive_bayes.py", line 66, in predict
    jll = self._joint_log_likelihood(X)
  File "/usr/local/lib/python3.5/dist-packages/sklearn/naive_bayes.py", line 722, in _joint_log_likelihood
    check_is_fitted(self, "classes_")
  File "/usr/local/lib/python3.5/dist-packages/sklearn/utils/validation.py", line 768, in check_is_fitted
    raise NotFittedError(msg % {'name': type(estimator).__name__})
sklearn.exceptions.NotFittedError: This MultinomialNB instance is not fitted yet. Call 'fit' with appropriate arguments before using this method.

0 个答案:

没有答案