我正在使用Texblob根据学生的评论对学生的情感进行分类-但代码在SA列下再次再现了相同的评论。 pip install textblob
已下载并更新,并且nltk.download()
上的所有软件包都是最新的。我还尝试了使用不同数据集的代码,但仍然显示相同的错误,即仅复制原始注释并将其再次粘贴到SA列下。
from textblob import TextBlob
import re
import collections
from collections import Counter
import csv
import os
import glob
import pandas as pd
import numpy as np
import nltk
import random
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import CountVectorizer
#nltk.download()
df1 = pd.read_csv('data.csv')
def clean_comments(text): #remove links and special characters using regex.
return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", text).split())
def analize_sentiment(text): #classify the polarity of a comment using textblob.
analysis = TextBlob(clean_comments(text))
if analysis.sentiment.polarity > 0:
return 1
elif analysis.sentiment.polarity == 0:
return 0
else:
return -1
# create a column with the result of the analysis:
df1['SA'] = np.array([ clean_comments(text) for text in df1['text'] ])