我正在编写电影情感分析,在代码中我得到一个错误,即:带有基数10的int()的无效文字。代码读取包含电影评论及其分数的单独文本文件。 ex)4这部电影很棒。谢谢您的帮助!编辑:错误出现在这里:第38行得分= int(lineSplits [0] .strip())
import re
class WordStatistic:
def __init__(self, keyword, averageScore = 0, occurences = 0):
self.keyword = keyword
self.averageScore = averageScore
self.occurences = occurences
def getWord(self) :
return self.keyword
def getAverageScore(self) :
return self.averageScore
def getOccurences(self) :
return self.occurences
def addNewScore(self, newScore) :
oldScoreSum = self.averageScore * self.occurences
self.occurences = self.occurences + 1
self.averageScore = (oldScoreSum + newScore) / (self.occurences)
def printWordStatistic(self) :
print ("Word : ", self.keyword)
print ("Occurences : ", self.occurences)
print ("Average Score : ", self.occurences, "\n\n")
# "teaching" the code
wordDictionary = {}
fileInstance = open("movieReviews.txt",'r')
fileText = fileInstance.read()
# formatting and splitting
reviewSplits = fileText.split("movieReviews")
for review in reviewSplits :
review = review.strip()
if review == "" :
continue
lineSplits = review.split("\n")
score = int(lineSplits[0].strip())
for i in range(1, len(lineSplits)) :
wordSplits = re.split("\t| ", lineSplits[i])
for word in wordSplits :
if word == "" :
continue
# If it is already present, then update the score and count
# Otherwise just add the new entry to the dictionary
if wordDictionary in(word) :
wordStatistic = wordDictionary.get(word)
wordStatistic.addNewScore(score)
else :
wordStatistic = WordStatistic(word, score, 1)
wordDictionary[word] = wordStatistic
# print the stats of the words
def printAllWordStatistic(wordDictionary) :
for wordStatistic in wordDictionary.values() :
wordStatistic.printWordStatistic()
# rating the actual review
def calculateAverageOfReview(review) :
review.replace("\t", " ")
review.replace("\n", " ")
wordSplits = review.split(" ")
averageScore = 0.0
totalCount = 0;
for word in wordSplits :
if wordDictionary in (word) :
averageScore += wordDictionary.get(word).getAverageScore()
totalCount = totalCount + 1
if totalCount != 0 :
return averageScore / totalCount
return -1
# getting user input and append multi lines of case of multi line review
while (True) :
print ("\nEnter a review : ");
multiLines = []
while True:
line = input()
if line:
multiLines.append(line)
else:
break
inputReview = '\n'.join(multiLines)
averageScore = calculateAverageOfReview(inputReview)
if averageScore != -1 :
if averageScore >= 2.50 :
print ("Positive Review");
else :
print ("Negative Review");
else :
print ("Unable to rate the review");
if input("\nDo you want to continue ? (Y/N) : ") != "Y" :
print ("Quitting the session.");
exit()
答案 0 :(得分:1)
这意味着int
不知道如何处理非0-9
的字符。如果您有一些任意字符串要从中提取数字,可以使用正则表达式,而不是:
score = int(lineSplits[0].strip())
像
这样的东西score = int(re.search('[0-9]+', lineSplits[0]).group()))
将获取第一组数字。