Question

我是初学者。我知道

的含义

Index Error: list index out of range.

但是我不明白这里出了什么问题。

此代码在11行上工作/打印，然后自动停止。

我该如何调试呢？

数据集链接：https://github.com/YoeriNijs/TweetAnalyzer`

代码：

import nltk, random, csv, sys
from nltk.corpus import names
from nltk.tokenize import word_tokenize
import naiveBayesClassifier
import textblob


def selectTweets(row):
    #print row

    tweetWords = []
    words = row[0].split()
    for i in words:
        i = i.lower()
        i = i.strip('@#\'"?,.!')
        tweetWords.append(i)

#all_words = set(word.lower() for passage in tweetWords for word in word_tokenize(passage[0]))    
#print "******************************888"
#print tweetWords 
row[0] = tweetWords
print "******************************555"
t = [({word: (word in word_tokenize(x[0])) for word in tweetWords}, x[1]) for x in row]
print t


if counter <= 49:
    trainTweets.append(row)
    #print "******************************000"
    #print trainTweets

else:
    testTweets.append(row)

trainTweets = []
testTweets = []


  print "Tweet Sentiment Analyzer by Yoeri Nijs"
  print "*" * 30


 while True:

# Ask for filename
filename =  str(raw_input("> Please enter a filename (.csv): "))

#Check if filename ends with .csv
if filename.endswith(".csv"):

    try:

        #Open file
        with open(filename, 'rb') as csvfile: 
            reader = csv.reader(csvfile, delimiter=';', quotechar='|')

            #Print succes message
            print "> File opened successfully!"

            counter = 0
            for row in reader:
                selectTweets(row)
                counter += 1

            print "> Wait a sec for the results..."

            cl = naiveBayesClassifier.trainer(trainTweets)

    print("Accuracy of the classifier: {0}".format(cl.accuracy(testTweets)))
            cl.show_informative_features(10)
            cl.show_informative_features(10)

            while True:

                tweetWords = []
                tweet =  str(raw_input("Please enter the text of the tweet you want to analize: "))
                words = tweet.split()
                for i in words:
                    i = i.lower()
                    i = i.strip('@#\'"?,.!')
                    tweetWords.append(i)
                tweet = ' '.join(tweetWords)
                print "> Analyzing the tweet"

                # Classify some text
                print "Sentiment of the tweet:", (cl.classify(tweet))

                while True:
                    print
                    repeat =  str(raw_input("> Do you want to check another tweet (y/n)? "))

                    if repeat == "n":
                        print "Exit program"
                        sys.exit()
                    if repeat != "y":
                        print "Something went wrong"
                    if repeat == "y":
                        break         

    #If file does not exist, display this
    except IOError:
        print "File does not exist."

#Else if file does not end with .csv, do this
else:
    print "Please open a file that ends with .csv"

我将每个火车数据除以********************************** 555，为什么最后一个数据打印'o'/' e'?? 这段代码产生了这个Erorr：

> Please enter a filename (.csv): data.csv
> File opened successfully!
******************************555
""print like it 11 time then it stop......""
******************************555

[({'google': False, 'tron': False, 'i': True, 'theme': False, 'ics': False, 
 'hate': False}, 'hate'), ({'google': False, 'tron': False, 'i': False, 
 'theme': False, 'ics': False, 'hate': False}, 'e')]
******************************555



Traceback (most recent call last):
  File "C:\Users\Nahid\Downloads\Compressed\TweetAnalyzer-    
 master_2\TweetAnalyzer-master\script.py", line 68, in <module>
    selectTweets(row)
  File "C:\Users\Nahid\Downloads\Compressed\TweetAnalyzer-     
 master_2\TweetAnalyzer-master\script.py", line 29, in selectTweets
    t = [({word: (word in word_tokenize(x[0])) for word in tweetWords}, 
 x[1]) for x in row]
IndexError: list index out of range

Answer 1

通过快速浏览，您以错误的模式打开了文件，二进制而不是文本。我运行代码时没有出现任何错误，也许是我误解了问题。

我承认，我不知道为什么您遇到了超出范围的异常，我无法重新创建错误

我所做的更改是：with open(filename, 'rt', encoding="utf8") as csvfile:

                                              ^here

我将模式更改为文本，并将编码更改为utf8，但我认为它仍将默认使用。

Tweet Sentiment Analyzer by Yoeri Nijs
******************************
> Please enter a filename (.csv): data.csv
> File opened successfully!
> Wait a sec for the results...
Accuracy of the classifier: 0.772
Most Informative Features
         contains(nexus) = True           positi : negati =     16.6 : 1.0
      contains(sandwich) = True           positi : negati =     14.6 : 1.0
           contains(ice) = True           positi : negati =     12.7 : 1.0
         contains(cream) = True           positi : negati =     12.7 : 1.0
         contains(looks) = True           positi : negati =      8.8 : 1.0
          contains(love) = True           positi : negati =      8.8 : 1.0
       contains(android) = True           positi : negati =      7.4 : 1.0
           contains(why) = True           negati : positi =      7.2 : 1.0
       contains(awesome) = True           positi : negati =      6.8 : 1.0
        contains(please) = True           negati : positi =      6.5 : 1.0
Please enter the text of the tweet you want to analize: ice
> Analyzing the tweet
Sentiment of the tweet: positive
> Do you want to check another tweet (y/n)? n
Exit program

错误抛出，（很多文本）

调试IndexError：列表索引超出范围

1 个答案: