我使用MLPCalssifier创建了类NeuralNetworkModel:
@staticmethod
def _init_pipeline(stopwords):
pipe_line = Pipeline([
("vect", CountVectorizer(stop_words=stopwords, lowercase=True)), # bag-of-words
("tfidf", TfidfTransformer()), # tf-idf
("clf", MLPClassifier(hidden_layer_sizes=(15, 15), activation="relu", solver="sgd", max_iter=3000, tol=1e-6, verbose=True)) # model mlp
])
return pipe_line
然后我使用模型训练我的数据集:
class TextClassificationPredict(object):
def __init__(self):
self.test = None
def save_model(self, filePath):
outfile = open(filePath, 'wb')
pickle.dump(obj, outfile)
outfile.close()
def get_train_data(self, train_data, test_data):
df_train = DataFrame(train_data)
df_test = DataFrame(test_data)
# init model neuralnetwork
model = NeuralNetworkModel()
time_start_train = time.time()
clf = model.clf.fit(df_train["content"], df_train.category)
time_stop_train = time.time()
time_train = time_stop_train - time_start_train
print("training time:", time_train) # the time would be round to 3 decimal in seconds
time_start_predict = time.time()
predicted = clf.predict(df_test["content"])
time_stop_predict = time.time()
time_predict = time_stop_predict - time_start_predict
print("predict time:", time_predict)
print("Accuracy", accuracy_score(df_test["category"], predicted))
# print(f1_score(df_test["category"], predicted, average=None))
print(clf.predict_proba(df_test["content"]))
我在main中创建了对象:
if __name__ == '__main__':
train_data = DataLoader(dataPath=Settings.DATA_TRAIN_PATH).get_json()
test_data = DataLoader(dataPath=Settings.DATA_TEST_PATH).get_json()
tcp = TextClassificationPredict()
tcp.get_train_data(train_data, test_data)
tcp.save_model(filePath='neural_network_model1515full.pk')
最后,我想使用保存在'neural_network_model1515full.pk'中的模型,所以我写道:
with open('neural_network_model1515full.pk', 'rb') as fin:
clf = pickle.load(fin)
clf.predict(input)
以下是此脚本的输出: AttributeError:'TextClassificationPredict'对象没有属性'predict'