我正在运行以下代码,以基于两个标签Notes and Sentiment(范围为0-4)对注释执行情感分析。我正在尝试与预测一起获得标签(注释)。有人可以帮忙吗?
我尝试正常打印,但只给出数字。
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.model_selection import train_test_split # function for splitting data to train and test sets
import nltk
from nltk.corpus import stopwords
from nltk.classify import SklearnClassifier
from subprocess import check_output
data = pd.read_csv(r"notes.csv")
# Keeping only the neccessary columns
data = data[['NOTES','SCORE',]]
data['NOTES'] = data['NOTES'].astype(str)
data['SCORE'] = data['SCORE'].astype(str)
# print(data)
# print(data[data['NOTES'].str.contains('See comments below')==True])
#see distribution
print(data['SCORE'].groupby(data['SCORE']).count())
#Feature Generation using Bag of Words
from sklearn.feature_extraction.text import CountVectorizer
from nltk.tokenize import RegexpTokenizer
#tokenizer to remove unwanted elements from out data like symbols and numbers
token = RegexpTokenizer(r'[a-zA-Z0-9]+')
cv = CountVectorizer(lowercase=True,stop_words='english',ngram_range = (1,1),tokenizer = token.tokenize)
text_counts= cv.fit_transform(data['NOTES'])
#split into training and testing
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
text_counts, data['SCORE'], test_size=0.3, random_state=1)
# Model Building and Evaluation
from sklearn.naive_bayes import MultinomialNB
#Import scikit-learn metrics module for accuracy calculation
from sklearn import metrics
# Model Generation Using Multinomial Naive Bayes
clf = MultinomialNB().fit(X_train, y_train)
predicted= clf.predict(X_test)
print("MultinomialNB Accuracy:",metrics.accuracy_score(y_test, predicted))
#notes
#TF-IDF METHOD:
from sklearn.feature_extraction.text import TfidfVectorizer
tf=TfidfVectorizer()
text_tf= tf.fit_transform(data['NOTES'])
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
text_tf, data['SCORE'], test_size=0.3, random_state=123)
print(X_test)
from sklearn.naive_bayes import MultinomialNB
from sklearn import metrics
# Model Generation Using Multinomial Naive Bayes
clf = MultinomialNB().fit(X_train, y_train)
predicted= clf.predict(X_test)
data['Prediction'] = predicted
print("MultinomialNB Accuracy USING TF-IDF:",metrics.accuracy_score(y_test, predicted))
print(X_test[predicted])
输出应为以下内容: 注释,即“您好,这很好”,预测分数为“ 4”