Python Twitter情绪分析错误

时间:2017-10-30 13:51:35

标签: python twitter nlp geocoding sentiment-analysis

尝试执行以下代码时出现以下错误。我该如何解决?

追踪(最近一次通话):   文件" C:\ Users \ vaio \ Desktop \ coding \ Twitter-Sentiment-Analysis-master \ Twitter-Sentiment-Analysis-master \ tweet_sentiment.py",第64行,in     主要()   文件" C:\ Users \ vaio \ Desktop \ coding \ Twitter-Sentiment-Analysis-master \ Twitter-Sentiment-Analysis-master \ tweet_sentiment.py",第53行,主要     sent_file = open(sys.argv [1]) IndexError:列表索引超出范围 [以0.1秒完成退出代码1]

import sys
import json
import ast
import re

def calcScoreFromTerm(termScoreFile):   # returns a dictionary with term-score values
    scores ={}
    for line in termScoreFile:
        term, score = line.split("\t")
        scores[term] = float(score)
    return scores

def getTweetText(tweet_file):   #returns a list of all tweets
    tweets = []
    for line in tweet_file:
        # print line
        jsondata = json.loads(line)
        if "text" in jsondata.keys():
            tweets.append(jsondata["text"])
    tweet_file.close()
    return tweets

def filterTweet(et):
    # Remove punctuations and non-alphanumeric chars from each tweet string
    pattern = re.compile('[^A-Za-z0-9]+')
    et = pattern.sub(' ', et)
    #print encoded_tweet

    words = et.split()

    # Filter unnecessary words
    for w in words:
        if w.startswith("RT") or w.startswith("www") or w.startswith("http"):
            words.remove(w)

    return words

def getTweetSentiments(tweets, scores):     #returns a list of sentiments
    sentiments = []

    for tweet in tweets:
        sentiment = 0.0
        tweet = tweet.encode('utf-8')
        wordsInTweet = filterTweet(tweet) # re.split('\W+',tweet)
        for eachWord in wordsInTweet:
            if eachWord in scores:
                sentiment += scores[eachWord]
        sentiments.append(sentiment)

    return sentiments

def main():
    sent_file = open(sys.argv[1])
    tweet_file = open(sys.argv[2])

    scores = calcScoreFromTerm(sent_file)
    tweets = getTweetText(tweet_file)
    sentiments = getTweetSentiments(tweets, scores)

    for sentiment in sentiments:
        print sentiment

if __name__ == '__main__':
    main()

1 个答案:

答案 0 :(得分:0)

看起来你应该提供sent_filetweet_file作为命令的参数。所以你会像这样调用它:

python tweet_sentiment.py <sent_file_name> <tweet_file_name>