import re
class WordStatistic:
def __init__(self, keyword, averageScore = 0, occurences = 0):
self.keyword = keyword
self.averageScore = averageScore
self.occurences = occurences
def getWord(self) :
return self.keyword
def getAverageScore(self) :
return self.averageScore
def getOccurences(self) :
return self.occurences
def addNewScore(self, newScore) :
oldScoreSum = self.averageScore * self.occurences
self.occurences = self.occurences + 1
self.averageScore = (oldScoreSum + newScore) / (self.occurences)
def printWordStatistic(self) :
print ("Word : "), self.keyword
print ("Occurences : "), self.occurences
print ("Average Score : ", self.occurences, "\n\n")
# Starting Training Phase
wordDictionary = {}
fileInstance = open("movieReviews.txt",'r')
fileText = fileInstance.read()
# Assuming, that each review is seperated by following delimeter
reviewSplits = fileText.split("$$EOD_REVIEW$$")
for review in reviewSplits :
review = review.strip()
if review == "" :
continue
# In each review, first line contains the score and the
# subsequent lines contains the text
lineSplits = review.split("\n")
score = float(lineSplits[0].strip())
for i in range(1, len(lineSplits)) :
# Splitting out the words in each line of the review
wordSplits = re.split("\t| ", lineSplits[i])
for word in wordSplits :
if word == "" :
continue
# If it is already present, then update the score and count
# Otherwise just add the new entry to the dictionary
if wordDictionary.has_key(word) :
wordStatistic = wordDictionary.get(word)
wordStatistic.addNewScore(score)
else :
wordStatistic = WordStatistic(word, score, 1)
wordDictionary[word] = wordStatistic
# Training Phase Completed
# To print the statistics of all words in the dictionary
def printAllWordStatistic(wordDictionary) :
for wordStatistic in wordDictionary.values() :
wordStatistic.printWordStatistic()
# To rate a review based on the training data
def calculateAverageOfReview(review) :
review.replace("\t", " ")
review.replace("\n", " ")
wordSplits = review.split(" ")
averageScore = 0.0
totalCount = 0;
for word in wordSplits :
if wordDictionary.has_key(word) :
averageScore += wordDictionary.get(word).getAverageScore()
totalCount = totalCount + 1
if totalCount != 0 :
return averageScore / totalCount
return -1
# User Review Input
while (True) :
print ("\nEnter a review, (enter empty-line to save) : ")
multiLines = []
while True:
line = raw_input()
if line:
multiLines.append(line)
else:
break
inputReview = '\n'.join(multiLines)
averageScore = calculateAverageOfReview(inputReview)
if averageScore != -1 :
if averageScore >= 2.50 :
print ("Positive Review")
else :
print ("Negative Review")
else :
print ("Unable to rate the review")
if raw_input("\nDo you want to continue ? (Y/N) : ") != "Y" :
print ("Quitting the session. Good Bye !")
exit()
所以我试图阅读电影的评论并显示评分的输入,但是下面却出现了转换错误
Traceback (most recent call last):
File "C:/Users/Adil Ali/Documents/moviereview.py", line 44, in <module>
score = float(lineSplits[0].strip("\""))
ValueError: could not convert string to float: '1 A series of escapades demonstrating the adage that what is good for the goose is also good for the gander , some of which occasionally amuses but none of which amounts to much of a story .\t'
我试图查找与此问题类似的解决方案,但找不到任何东西。我应该将字符串放入strip函数中还是需要进行一些更改。请让我知道,我正在从该项目的电影评论中读取文本文件
答案 0 :(得分:0)
错误提示您
score = float(lineSplits[0].strip("\""))
您尝试
score = float('1 A series of escapades ...')
您不能将其转换为数字。您只需从此元素中获取1
。您可以尝试split(" ")[0]
将行拆分为包含元素的列表并获取第一个元素-1
score = float(lineSplits[0].strip("\"").split(" ")[0] )