Question

我几乎没有编程方面的实践经验，但是我已经开始学习python，并希望创建一个用于计算文本中最常用单词的函数。现在，我确定我的版本不是执行此操作的最佳方法，但是它可以工作：

 import os

 punctuation = "~!@#$%^&*()_-=+[{]}\\|'\";:,<.>/?"

 def remove_punctuation(text):

     text_wo_punctuation = ""
     for word in text:
         if word not in punctuation:
             text_wo_punctuation += word
     return text_wo_punctuation

 with open(r'New Text Document.txt') as f:

     text = f.read().lower()
     t = remove_punctuation(text).split()
     dictionary = {}
     for word in t:
         if word in dictionary:
             dictionary[word] = dictionary[word] + 1
         else:
             dictionary[word] = 1

 print(dictionary)

 def top_five(d):

     top = {}
     value1 = 0
     value2 = 0
     value3 = 0
     value4 = 0
     value5 = 0


     for key in dictionary:
         if value1 < dictionary[key] and key not in top:
             value1 = dictionary[key]
             top1 = {key:value1}
         else:
             continue
     top.update(top1)    
     for key in dictionary:
         if value2 < dictionary[key] and key not in top:
             value2 = dictionary[key]
             top2 = {key:value2}
         else:
             continue
     top.update(top2)
     for key in dictionary:
         if value3 < dictionary[key] and key not in top:
             value3 = dictionary[key]
             top3 = {key:value3}
         else:
             continue
     top.update(top3)
     for key in dictionary:
         if value4 < dictionary[key] and key not in top:
             value4 = dictionary[key]
             top4 = {key:value4}
         else:
             continue
     top.update(top4)
     for key in dictionary:
         if value5 < dictionary[key] and key not in top:
             value5 = dictionary[key]
             top5 = {key:value4}
         else:
             continue
     top.update(top5)
    return top

 print(top_five(dictionary))

上面的代码将给出以下输出：

{'word1'：“ freq1”，'word2'：“ freq2”，'word3'：“ freq3”，'word4'：“ freq4”，'word5'：“ freq5”}}

尽管这是我想要的结果，但我已尝试简化我的功能，并让用户选择应该计算多少个单词的频率：

 def top_five(d,n):

     top = {}
     values = {}
     for i in range(1,n+1):
         values["value"+str(i)]=0
     for i in range(1,n+1):
         top["top"+str(i)]=0

     for i in range(1,n+1):
         for key in dictionary :
             if values["value"+str(i)] < dictionary[key] and key not in top:
                 values["value"+str(i)] = dictionary[key]
                 top["top"+str(i)] = {key:values["value"+str(i)]}
             else:
                 continue
         top.update(top1)
     print(top)
     return top

此代码将创建一个可以在循环中使用的具有value1，value2等的字典，以及具有top1，top2等的另一本字典，但是它将不起作用，因为“和键不在顶部”将不起作用。

top["top"+str(i)] = {key:values["value"+str(i)]}

这将在字典中创建字典。我被困在这里，因为我找不到使“ top”字典有用或在循环内迭代变量名的方法。我已经读过应该使用列表或字典，并且变量名迭代不是一个好主意，但是我不明白为什么这样做，并且我想不出一种使列表或字典在for循环中有用的方法。

正如我所说，我知道这可能不是实现这种功能的最佳方法，但我的问题是：如何简化已经完成的工作并使循环正常工作？

谢谢！

Answer 1

我已经按照Barmar的建议更新了代码：

def remove_punctuation(text):
""""Removes punctuation characters from given text"""
punctuation = "~`!@#$%^&*()_-=+[{]}\\|'\";:,<.>/?"
text_wo_punctuation = ""
for word in text:
    if word not in punctuation:
        text_wo_punctuation += word
return text_wo_punctuation

def count_words(file):
    """Returns a dictionary of words and word count from "file" """
    with open(file) as f:
        text = remove_punctuation(f.read()).lower().split()
        dictionary = {}
        for word in text:
    #        print(word)
            if word in dictionary:
                dictionary[word] = dictionary[word] + 1
    #            print("**Existing**")
            else:
                dictionary[word] = 1
    #            print("**New**")
    #        print(dictionary[word])
    return dictionary
    #print(dictionary)

def dict_sort(d, reverse = False):
    """Sort given dictionary "d" in ascending (default)
        or descending (reverse = True) order
        Outputs tuple of: list of keys, list of values and dictionary
        Recommended format for output: a,b,c = dict_sort(d)"""
    key_list = []
    value_list = []
    for key in d:
        key_list.append(key)
        value_list.append(d[key])
    #print(key_list)
    #print(value_list)
    for i in range(len(value_list)-1):
        for i in range(len(value_list)-1):
            if reverse == False:
                if value_list[i] > value_list[i+1]:
                    value_list[i],value_list[i+1] = value_list[i+1],value_list[i]
                    key_list[i],key_list[i+1] = key_list[i+1],key_list[i]
            elif reverse == True:
                if value_list[i] < value_list[i+1]:
                    value_list[i],value_list[i+1] = value_list[i+1],value_list[i]
                    key_list[i],key_list[i+1] = key_list[i+1],key_list[i]
    d = {}
    for i in range(len(value_list)):
        d[key_list[i]] = value_list[i]
    sorted_dict = d    
    return key_list,value_list,sorted_dict

def word_freq():
    """Input how many words to plot on graph"""
    while True:
        try:
            n_freq = int(input("How many of the most frequent words would you like to display?\n"))
            if (n_freq < 1 or n_freq > 10):
                print("Please input an integer between 1 and 10:")
                continue
        except(ValueError):
            print("Please input an integer between 1 and 10:")
            continue
        else:
            break
    return n_freq

def graph_word_freq(n,f,w):                     #create function to draw chart
    """Draw bar chart of most frequent words in text
        n: number of words to plot (between 1 and 10)
        f: word frequency list
        w: word list"""

    import turtle                                       #import turtle module
    window = turtle.Screen()                            #create screen
    window.bgcolor("honeydew")                          #define screen color
    window.title("Most Frequent Words")                 #set window title
    if f[0] < 960:
        y = 500
        y_pos = -480
        width = 60
        spacing = 20
        x_pos = -(30+40*(n-1))
    else:
        width = 100
        spacing = 40
        y = f[0]/2+20
        y_pos = -f[0]/2
        x_pos = -(50+70*(n-1))

    #turtle.screensize(y,y)                              #set window size
    turtle.setworldcoordinates(-y,-y,y,y)
    tortoise = turtle.Turtle()                          #create turtle
    tortoise.hideturtle()                               #hide turtle stamp
    tortoise.penup()                                    #raise turtle pen
    tortoise.setposition(x_pos,y_pos)                   #position turtle
    tortoise.pendown()                                  #put turtle pen down
    tortoise.speed(5)                                   #set drawing speed

    for i in range(n):
        if abs(f[i]) < ((f[0]-f[n])/3):
            tortoise.color("SeaGreen","ForestGreen")    #set turtle color
        elif abs(f[i]) >= ((f[0]-f[n])/3) and abs(f[i]) < ((f[0]-f[n])/1.5):
            tortoise.color("orange","gold")             #set turtle color
        else:
            tortoise.color("coral3","IndianRed")        #set turtle color

        tortoise.begin_fill()                           #begin drawing shapes
        tortoise.left(90)
        tortoise.forward(f[i])                          #draw bar height
        tortoise.right(90)
        tortoise.forward(1/3*width)                            #prepare for text
        if f[i] >= 0:
            tortoise.write(f[i])                        #write value
        else:
            tortoise.penup()
            tortoise.right(90)
            tortoise.forward(15)
            tortoise.write(f[i])
            tortoise.forward(-15)
            tortoise.left(90)
            tortoise.pendown()
        tortoise.forward(2/3*width)                     #bar width
        tortoise.right(90)
        tortoise.forward(f[i])
        tortoise.left(90)
        tortoise.penup()
        tortoise.right(90)
        tortoise.forward(25)
        tortoise.left(90)
        tortoise.forward(-2/3*width)
        tortoise.write(w[i])                            #write word
        tortoise.forward(2/3*width)
        tortoise.left(90)
        tortoise.forward(25)
        tortoise.right(90)
        tortoise.forward(spacing)                       #spacing
        tortoise.pendown()
        tortoise.end_fill()                             #stop drawing shapes
    turtle.exitonclick()

dictionary = count_words("New Text Document.txt")

words,values,dictionary = dict_sort(dictionary, reverse = True)

n_freq = word_freq()

graph_word_freq(n_freq,values,words)

现在可以正常使用了。谢谢，队友！

Answer 2

了解您想实现自己的冒泡排序，而不是使用Python的快速排序，并且您想要自己对单词进行计数，而不是使用Counter，让我们收紧代码以利用Python惯用法并减轻负担在可怜的乌龟上：

Location

循环内迭代变量名

2 个答案: