Question

我遇到了finds_anagrams函数的问题。我想创建一个字典，它是所有单词anagrams的列表。 sig变量是按字母顺序排列的单词的签名。这本词典取代了 d = {“word1”：[word2，word3，word4]，“word5”：[word10，word9，word20]等...} 输入文件包含英语单词列表

def get_signature(word):
    """takes a word breaks it into a list and sort it alph, and then
    back into a word
    """
    l = word.split()
    l.sort()    
    w = ''.join(l)
    return w    


def is_anagram(sig, word):
    """takes a given number of letters and compare to another 
    word to see if they are anagrams: must be same length
    """ 
    if len(word) != len(sig): #if words not same len return False
        print "here1"
        return False
    for ch in sig:
        if ch not in word:
            return False

    return True     


def finding_anagrams(fin):
    """Ex 12-4 this funcion reads a wordlist and find the words that makes up 
    the most anagrams
    """
    #read the wordlist file 
    d = {}
    for line in fin:
        word = line.strip().lower()
        sig = get_signature(word) #put the letters in alphabetical
        if sig not in d:
            d[sig] = []
        for l in fin:
            w = l.strip().lower()
            print w, sig, "here"
            if is_anagram(sig, w):
                d[sig].append(w)

    return d                    



def print_anagrams(number, d):
    """prints all anagrams of given word
    """
    for key, value in d.items():
        if len(key) == number:
            print key, d[key]

main()
    filein = open("words.txt")
    anagrams = finding_anagrams(filein)
    print_anagrams(5, anagrams) 


if __name__ == "__main__":
    main()

Answer 1

您正在迭代文件对象两次，在第一个内部循环之后，您已经耗尽了迭代器。使用defaultdict会更有效率，只需对单词进行排序调用也可以避免不必要的函数调用

def finding_anagrams(fin):
    """Ex 12-4 this funcion reads a wordlist and find the words that makes up
    the most anagrams
    """
    #read the wordlist file
    from collections import defaultdict
    d = defaultdict(list)
    lines = fin.readlines() # put in a list 
    for ind,line in enumerate(lines[:-1]):
        word = line.rstrip().lower()
        sig = "".join(sorted(word)) # this line does what get_signature does
        if any(is_anagram(sig, word.rstrip().lower()) for word in lines[ind+1:]): # check all words from current + 1 in the lines list 
            d[sig].append(word)
    return d

您还可以删除对is_anagram的需求，从而消除对sig的需求：

 if any(sorted(word) == sorted(w.rstrip().lower())) for w in lines[ind+1:])

Answer 2

import collections

def is_anagram(w1, w2):
    return collections.Counter(w1) == collections.Counter(w2)

def get_signature(word):
    return ''.join(sorted(word))

def find_anagrams(infilepath):
    answer = {}
    with open(infilepath) as infile:
        for line in infile:
            word = line.strip().lower()
            sig = get_signature(word)
            if sig not in answer:
                answer[sig] = set()
            answer[sig].add(word)
    return answer

def find_most_anagrams(infilepath):
    anagrams = find_anagrams(infilepath)
    most = max(anagrams, key=lambda k:len(anagrams[k]))
    print "The maximum number of anagrams are made with the letters", most, '.'
    print "The anagrams are:",
    print '\n\t'.join(anagrams[most])

finding_anagrams函数不是创建字谜字典

2 个答案: