我是python编程的新手,我想做一个情绪分析 通过word2vec根据亚马逊评论。 我的问题是我创建了三个函数,因为我必须对评论进行评论并分割单词。
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from bs4 import BeautifulSoup
import nltk
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from nltk.stem import SnowballStemmer
data = pd.read_csv('Reviews.csv')
print(data.head(4))
data['pos'] = np.where(data['Score'] > 3, 1, 0)
#suddivion the file that analyzed by the score ( from 1 to 5)
X_train, X_test, y_train, y_test = train_test_split(data['Text'],data['pos'], test_size = 0.2, random_state=0)
nltk.download()
# now start the problems
def pulitoretesto (prim_testo, rmv_stpwrds=False, stemming=False, split_testo=False):
testo = BeautifulSoup(prim_testo, 'lxml').get_text()
lett = re.sub("[^a-zA-Z]", " ",testo)
pr = lett.lower().split()
if rmv_stpwrds:
stop = set(stopwords.words("english"))
pr= [a for a in pr if not w in stop]
if stemming==True:
eliminsuf = SnowballStemmer('english')
pr = [eliminsuf.stem(a) for a in pr]
if split_testo==True:
return (pr)
return( " ".join(pr))
X_train_nuovo = []
for x in X_train:
X_train_nuovo.append(pulitoretesto(x))
print(' nuova X_train :\n', X_train_nuovo)
X_test_nuovo = []
for x in X_test:
X_test_nuovo.append(pulitoretesto(x))
tokenizer=nltk.data.load('tokenizers/punkt/english.pickle')
def parsfrasi(revi,tokenizer, rmv_stpwrds=False):
prmtv_frs=tokenizer.tokenize(revi.strip())
frasi = []
for prmtv_frs1 in prmtv_frs :
if len(prmtv_frs1) > 0 :
frasi.pulitoretesto(prmtv_frs1, rmv_stpwrds, split_testo=True )
return frasi
frasi = []
for revi in X_train_nuovo:
frasi += parsfrasi(revi, tokenizer)
当我在那里拟合代码时: AttributeError:' list'对象没有属性' pulitoretesto' 感谢大家=)
答案 0 :(得分:1)
我不确定究竟发生了什么,但函数中的一行是
frasi = []
然后你做
frasi.pulitoretesto(prmtv_frs1, rmv_stpwrds, split_testo=True )
无法完成,因为frasi
被声明为列表
如果你想调用函数'pulitoretesto'并将其添加到frasi,请执行以下操作:
frasi.append(pulitoretesto(prmtv_frs1, rmv_stpwrds, split_testo=True))