使用训练模型进行预测,使用MLPClassifier进行文本分类

时间:2018-04-29 19:15:21

标签: python machine-learning scikit-learn nlp text-classification

我使用MLPCalssifier创建了类NeuralNetworkModel:

@staticmethod
def _init_pipeline(stopwords):
    pipe_line = Pipeline([
        ("vect", CountVectorizer(stop_words=stopwords, lowercase=True)), # bag-of-words
        ("tfidf", TfidfTransformer()), # tf-idf
        ("clf", MLPClassifier(hidden_layer_sizes=(15, 15), activation="relu", solver="sgd", max_iter=3000, tol=1e-6, verbose=True)) # model mlp
    ])

    return pipe_line

然后我使用模型训练我的数据集:

class TextClassificationPredict(object):
    def __init__(self):
        self.test = None

    def save_model(self, filePath):
        outfile = open(filePath, 'wb')
        pickle.dump(obj, outfile)
        outfile.close()

    def get_train_data(self, train_data, test_data):
        df_train = DataFrame(train_data)
        df_test = DataFrame(test_data)

        # init model neuralnetwork
        model = NeuralNetworkModel()
        time_start_train = time.time()
        clf = model.clf.fit(df_train["content"], df_train.category)
        time_stop_train = time.time()
        time_train = time_stop_train - time_start_train
        print("training time:", time_train)  # the time would be round to 3 decimal in seconds
        time_start_predict = time.time()
        predicted = clf.predict(df_test["content"])
        time_stop_predict = time.time()
        time_predict = time_stop_predict - time_start_predict
        print("predict time:", time_predict)


        print("Accuracy", accuracy_score(df_test["category"], predicted))
        # print(f1_score(df_test["category"], predicted, average=None))
        print(clf.predict_proba(df_test["content"]))

我在main中创建了对象:

if __name__ == '__main__':
    train_data = DataLoader(dataPath=Settings.DATA_TRAIN_PATH).get_json()
    test_data = DataLoader(dataPath=Settings.DATA_TEST_PATH).get_json()
    tcp = TextClassificationPredict()
    tcp.get_train_data(train_data, test_data)
    tcp.save_model(filePath='neural_network_model1515full.pk')

最后,我想使用保存在'neural_network_model1515full.pk'中的模型,所以我写道:

    with open('neural_network_model1515full.pk', 'rb') as fin:
        clf = pickle.load(fin)
    clf.predict(input)

以下是此脚本的输出:  AttributeError:'TextClassificationPredict'对象没有属性'predict'

0 个答案:

没有答案