我在使用某些Python代码时遇到了一些麻烦。我有一个名为" big.txt"的大文本文件。我在我的代码中迭代它以将每个单词排序成一个数组(或列表),然后再次迭代它以删除任何不在字母表中的字符。我还有一个名为worddistance
的函数,它会查看两个单词的相似程度,然后返回一个分数。我有另一个名为autocorrect
的函数。我希望将这个函数传递给拼写错误的单词,然后打印一个'Did you mean...'
句子,其中包含worddistance
函数得分较低的单词(每当发现差异时,该函数会向计数器添加1 - 较低得分,越相似)。
奇怪的是,我不断收到错误:
"Index Error: string index out of range"
我对正在发生的事感到茫然!
我的代码如下。
提前感谢回复,
塞缪尔·诺顿
f = open("big.txt", "r")
words = list()
temp_words = list()
for line in f:
for word in line.split():
temp_words.append(word.lower())
allowed_characters = 'abcdefghijklmnopqrstuvwxyz'
for item in temp_words:
temp_new_word = ''
for char in item:
if char in allowed_characters:
temp_new_word += char
else:
continue
words.append(temp_new_word)
list(set(words)).sort()
def worddistance(word1, word2):
counter = 0
if len(word1) > len(word2):
counter += len(word1) - len(word2)
new_word1 = word1[:len(word2) + 1]
for char in range(0, len(word2) + 1) :
if word2[char] != new_word1[char]:
counter += 1
else:
continue
elif len(word2) > len(word1):
counter += len(word2) - len(word1)
new_word2 = word2[:len(word1) + 1]
for char in range(0, len(word1) + 1):
if word1[char] != word2[char]:
counter += 1
else:
continue
return counter
def autocorrect(word):
word.lower()
if word in words:
print("The spelling is correct.")
return
else:
suggestions = list()
for item in words:
diff = worddistance(word, item)
if diff == 1:
suggestions.append(item)
print("Did you mean: ", end = ' ')
if len(suggestions) == 1:
print(suggestions[0])
return
else:
for i in range(0, len(suggestions)):
if i == len(suggestons) - 1:
print("or " + suggestions[i] + "?")
return
print(suggestions[i] + ", ", end="")
return
答案 0 :(得分:0)
在worddistance()
中,for char in range(0, len(word1) + 1):
看起来应该是:
for char in range(len(word1)):
for char in range(0, len(word2) + 1) :
应该是:
for char in range(len(word2)):
顺便说一下,list(set(words)).sort()
正在排序一个临时列表,这可能不是你想要的。它应该是:
words = sorted(set(words))
答案 1 :(得分:0)
如其他评论中所述,您应该range(len(word1))
。
除此之外:
- 您应该考虑word1和单词长度相同的情况#len(word2) == len(word1)
- 你还应该注意命名。在wordDistance函数的第二个条件
if word1[char] != word2[char]:
您应该与new_word2
if word1[char] != new_word2[char]:
- 在自动更正中,您应该将低级分配给word= word.lower()
words= []
for item in temp_words:
temp_new_word = ''
for char in item:
if char in allowed_characters:
temp_new_word += char
else:
continue
words.append(temp_new_word)
words= sorted(set(words))
def worddistance(word1, word2):
counter = 0
if len(word1) > len(word2):
counter += len(word1) - len(word2)
new_word1 = word1[:len(word2) + 1]
for char in range(len(word2)) :
if word2[char] != new_word1[char]:
counter += 1
elif len(word2) > len(word1):
counter += len(word2) - len(word1)
new_word2 = word2[:len(word1) + 1]
for char in range(len(word1)):
if word1[char] != new_word2[char]: #This is a problem
counter += 1
else: #len(word2) == len(word1) #You missed this case
for char in range(len(word1)):
if word1[char] != word2[char]:
counter += 1
return counter
def autocorrect(word):
word= word.lower() #This is a problem
if word in words:
print("The spelling is correct.")
else:
suggestions = list()
for item in words:
diff = worddistance(word, item)
print diff
if diff == 1:
suggestions.append(item)
print("Did you mean: ")
if len(suggestions) == 1:
print(suggestions[0])
else:
for i in range(len(suggestions)):
if i == len(suggestons) - 1:
print("or " + suggestions[i] + "?")
print(suggestions[i] + ", ")
下次尝试使用像enumerate这样的Python内置函数,以避免使用i in range(list)
,然后使用list[i]
,len代替计数器......等等
例如: 你的距离函数可以这样写,或者更简单。
def distance(word1, word2):
counter= max(len(word1),len(word2))- min(len(word1),len(word2))
if len(word1) > len(word2):
counter+= len([x for x,z in zip (list(word2), list(word1[:len(word2) + 1])) if x!=z])
elif len(word2) > len(word1):
counter+= len([x for x,z in zip (list(word1), list(word2[:len(word1) + 1])) if x!=z])
else:
counter+= len([x for x,z in zip (list(word1), list(word2)) if x!=z])
return counter