Python:NLTK:返回字典 - 仅返回1个值

时间:2015-03-05 18:53:42

标签: python dictionary nltk

很抱歉在这里转储整个代码块(如下)。我一直想弄清楚我做错了什么,但不幸的是我不知道。

对于我的论文,我必须将推文分类为中性(0),负(-1)或正(1)。我是通过使用NLTK来尝试这个的。目标是代码以“tweetA,0'”,“tweetB”,“-1”等形式返回字典...目前,如果我输入多条推文作为输入,我只得到第一条推文的结果(即-1/0/1)。

例如,如果我说'我喜欢橘子''我讨厌西红柿'在输入中,我只得到' 1'作为回报而不是' 1',' -1'。

如果有人能够帮助我,我真的很感激!

我到目前为止的代码:

import re, math, collections, itertools
import nltk
import nltk.classify.util, nltk.metrics
from nltk.classify import NaiveBayesClassifier
from nltk.metrics import BigramAssocMeasures
from nltk.probability import FreqDist, ConditionalFreqDist  
from nltk.util import ngrams
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.stem.porter import *
from nltk.stem.snowball import SnowballStemmer

stemmer = SnowballStemmer("english", ignore_stopwords = True)
pos_tweets = ['I love bananas','I like pears','I eat oranges']
neg_tweets = ['I hate lettuce','I do not like tomatoes','I hate apples']
neutral_tweets = ['I buy chicken','I am boiling eggs','I am chopping vegetables']

def uni(doc):
    x = []
    y = []
    for tweet in doc:
        x.append(word_tokenize(tweet))
    for element in x:
        for word in element:
            if len(word)>2:
                word = word.lower()
                word = stemmer.stem(word)
                y.append(word)
    return y

def word_feats_uni(doc):
     return dict([(word, True) for word in uni(doc)])

def tokenizer_ngrams(document):
    all_tokens = []
    filtered_tokens = []
    for (sentence) in document:
        all_tokens.append(word_tokenize(sentence))
    return all_tokens

def get_bi (document):
    x = tokenizer_ngrams(document)
    c = []
    for sentence in x:
        c.extend([bigram for bigram in nltk.bigrams(sentence)])
    return c

def get_tri(document):
    x = tokenizer_ngrams(document)
    c = []
    for sentence in x:
        c.extend([bigram for bigram in nltk.bigrams(sentence)])
    return c


def word_feats_bi(doc): 
    return dict([(word, True) for word in get_bi(doc)])

def word_feats_tri(doc):
    return dict([(word, True) for word in get_tri(doc)])

def word_feats_test(doc):
    feats_test = {}
i = 0
for tweet in doc:
    feats_test.update(word_feats_uni(tweet))
    feats_test.update(word_feats_bi(tweet))
    feats_test.update(word_feats_tri(tweet))
return feats_test


pos_feats = [(word_feats_uni(pos_tweets),'1')] + [(word_feats_bi(pos_tweets),'1')] + [(word_feats_tri(pos_tweets),'1')]

neg_feats = [(word_feats_uni(neg_tweets),'-1')] + [(word_feats_bi(neg_tweets),'-1')] + [(word_feats_tri(neg_tweets),'-1')]

neutral_feats = [(word_feats_uni(neutral_tweets),'0')] + [(word_feats_bi(neutral_tweets),'0')] + [(word_feats_tri(neutral_tweets),'0')]

trainfeats = pos_feats + neg_feats + neutral_feats
classifier = NaiveBayesClassifier.train(trainfeats)
print (classifier.classify(word_feats_test(['I love oranges'])))

0 个答案:

没有答案