这是Edit distance
问题的实施。
这是没有任何记忆的递归版本
def edit_distance(str_a, str_b, len_a, len_b):
# bot strs consumed
if len_a == 0 and len_b == 0:
return 0
if len_a == 0:
return len_b
if len_b == 0:
return len_a
i_a = len_a-1
i_b = len_b-1
if str_a[i_a] == str_b[i_b]:
return edit_distance(str_a, str_b, i_a, i_b)
replace = edit_distance(str_a, str_b, i_a, i_b)
delete = edit_distance(str_a, str_b, i_a, len_b)
insert = edit_distance(str_a, str_b, len_a, i_b)
return 1+min(replace, delete, insert)
现在这里是memoized版本,我在其中缓存调用结果。
def edit_distance_memo(str_a, str_b, len_a, len_b, cache):
if cache[len_a][len_b] != -1:
return cache[len_a][len_b]
if len_a == 0 and len_b == 0:
cache[len_a][len_b] = 0
return 0
if len_a == 0:
cache[len_a][len_b] = len_b
return len_b
if len_b == 0:
cache[len_a][len_b] = len_a
return len_a
if cache[len_a][len_b] != -1:
return cache[len_a][len_b]
i_a = len_a-1
i_b = len_b-1
if str_a[i_a] == str_b[i_b]:
cache[len_a][len_b] = edit_distance_memo(str_a, str_b, i_a, i_b, cache)
return cache[len_a][len_b]
replace = edit_distance_memo(str_a, str_b, i_a, i_b, cache)
delete = edit_distance_memo(str_a, str_b, i_a, len_b, cache)
insert = edit_distance_memo(str_a, str_b, len_a, i_b, cache)
best_option = min(replace, delete, insert)
cache[len_a][len_b] = 1 + best_option
return cache[len_a][len_b]
这是调用代码:
from time import time
str1 = "Shakespeare"
str2 = "shake spear"
s1 = time()
print edit_distance(str1, str2, len(str1), len(str2)), "edit_distance"
print "diff---", time()-s1
rows = len("Shakespeare")+1
columns = len("shake spear")+1
cache = [[-1] * columns] * rows
st = time()
print edit_distance_memo("Shakespeare", "shake spear",len("Shakespeare"), len("shake spear"), cache)
由于某种原因,记忆版本似乎给出了错误的答案(7而不是3)。这有什么不对?
提前致谢。