你好,有人可以帮我这个代码吗?这是终端错误 文件“bayes1.py”,第74行,in print classifier.classify(extract_features(tweet.split())) AttributeError:'list'对象没有属性'split'
你可以创建一个删除.txt文件单词stop的函数吗? 非常感谢
enter code here
import nltk
import csv
from nltk.classify.naivebayes import NaiveBayesClassifier
import sys
import codecs
twitterData = sys.argv[1] # tweet input file (.csv)
def tweet_dict(twitterData):
''' (file) -> list of dictionaries
This method should take your .csv
file and create a list of dictionaries.
'''
twitter_list_dict = []
twitterfile = open(twitterData)
twitterreader = csv.reader(twitterfile)
for line in twitterreader:
twitter_list_dict.append(line[0])
return twitter_list_dict
def get_words_in_tweets(tweets):
all_words = []
for (words, sentiment) in tweets:
all_words.extend(words)
return all_words
def get_word_features(wordlist):
wordlist = nltk.FreqDist(wordlist)
word_features = wordlist.keys()
return word_features
def read_tweets(fname, t_type):
tweets = []
f = open(fname, 'r')
line = f.readline()
while line != '':
tweets.append([line, t_type])
line = f.readline()
f.close()
return tweets
def extract_features(document):
document_words = set(document)
features = {}
for word in word_features:
features['contains(%s)' % word] = (word in document_words)
return features
# read in postive and negative training tweets
pos_tweets = read_tweets('amazon_positive.txt', 'positive')
neg_tweets = read_tweets('amazon_negative.txt', 'negative')
tweets = []
for (words, sentiment) in pos_tweets + neg_tweets:
words_filtered = [e.lower() for e in words.split() if len(e) >= 3]
tweets.append((words_filtered, sentiment))
# extract the word features out from the training data
word_features = get_word_features(\
get_words_in_tweets(tweets))
training_set = nltk.classify.util.apply_features(extract_features, tweets)
classifier = NaiveBayesClassifier.train(training_set)
print classifier.show_most_informative_features()
#show_most_informative_features
tweet = tweet_dict(twitterData)
print classifier.classify(extract_features(tweet.split()))