值错误无法将字符串转换为浮点数

时间:2019-12-09 00:58:18

标签: python

import re

    class WordStatistic:

    def __init__(self, keyword, averageScore = 0, occurences = 0):
        self.keyword = keyword
        self.averageScore = averageScore
        self.occurences = occurences

    def getWord(self) :
        return self.keyword

    def getAverageScore(self) :
        return self.averageScore

    def getOccurences(self) :
        return self.occurences

    def addNewScore(self, newScore) :
        oldScoreSum = self.averageScore * self.occurences
        self.occurences = self.occurences + 1
        self.averageScore = (oldScoreSum + newScore) / (self.occurences)

    def printWordStatistic(self) :
           print ("Word          : "), self.keyword
           print ("Occurences    : "), self.occurences
           print ("Average Score : ", self.occurences, "\n\n")

# Starting Training Phase

wordDictionary = {}
fileInstance = open("movieReviews.txt",'r')
fileText = fileInstance.read()

# Assuming, that each review is seperated by following delimeter
reviewSplits = fileText.split("$$EOD_REVIEW$$")
for review in reviewSplits :
        review = review.strip()
        if review == "" :
            continue
        # In each review, first line contains the score and the
        # subsequent lines contains the text
        lineSplits = review.split("\n")
        score = float(lineSplits[0].strip())
        for i in range(1, len(lineSplits)) :
            # Splitting out the words in each line of the review
            wordSplits = re.split("\t| ", lineSplits[i])
            for word in wordSplits :
                if word == "" :
                    continue
                # If it is already present, then update the score and count
                # Otherwise just add the new entry to the dictionary
                if wordDictionary.has_key(word) :
                    wordStatistic = wordDictionary.get(word)
                    wordStatistic.addNewScore(score)
                else :
                    wordStatistic = WordStatistic(word, score, 1)
                    wordDictionary[word] = wordStatistic

# Training Phase Completed


# To print the statistics of all words in the dictionary
def printAllWordStatistic(wordDictionary) :
    for wordStatistic in wordDictionary.values() :
        wordStatistic.printWordStatistic()

# To rate a review based on the training data
def calculateAverageOfReview(review) :
    review.replace("\t", " ")
    review.replace("\n", " ")
    wordSplits = review.split(" ")

    averageScore = 0.0
    totalCount = 0;
    for word in wordSplits :
        if wordDictionary.has_key(word) :
            averageScore += wordDictionary.get(word).getAverageScore()
            totalCount = totalCount + 1
    if totalCount != 0 :
        return averageScore / totalCount
    return -1


# User Review Input
while (True) :
    print ("\nEnter a review, (enter empty-line to save) : ")
    multiLines = []
    while True:
        line = raw_input()
        if line:
            multiLines.append(line)
        else:
            break
    inputReview = '\n'.join(multiLines)

    averageScore = calculateAverageOfReview(inputReview)
    if averageScore != -1 :
        if averageScore >= 2.50 :
            print ("Positive Review")
        else :
            print ("Negative Review")
    else :
        print ("Unable to rate the review")

    if raw_input("\nDo you want to continue ? (Y/N) : ") != "Y" :
        print ("Quitting the session. Good Bye !")
        exit()

所以我试图阅读电影的评论并显示评分的输入,但是下面却出现了转换错误

Traceback (most recent call last):
  File "C:/Users/Adil Ali/Documents/moviereview.py", line 44, in <module>
    score = float(lineSplits[0].strip("\""))
ValueError: could not convert string to float: '1 A series of escapades demonstrating the adage that what is good for the goose is also good for the gander , some of which occasionally amuses but none of which amounts to much of a story .\t'

我试图查找与此问题类似的解决方案,但找不到任何东西。我应该将字符串放入strip函数中还是需要进行一些更改。请让我知道,我正在从该项目的电影评论中读取文本文件

1 个答案:

答案 0 :(得分:0)

错误提示您

 score = float(lineSplits[0].strip("\""))

您尝试

 score = float('1 A series of escapades ...')

您不能将其转换为数字。您只需从此元素中获取1。您可以尝试split(" ")[0]将行拆分为包含元素的列表并获取第一个元素-1

 score = float(lineSplits[0].strip("\"").split(" ")[0] )