有人可以向我解释一下,当我删除包含'#duplicate'?
的行时,为什么我会得到不同的结果?import re
def nysiis(term: str) -> str:
"""
returns New York State Identification and Intelligence Algorithm (NYSIIS) code for given term
"""
if not len(term):
return ''
else:
term = term.upper()
table = {
r'\W+': '', # remove whitespace and non-word characters
r'^MAC': 'MCC',
r'^KN': 'NN',
r'K': 'C',
r'PH|PF': 'FF',
r'SCH': 'SSS',
r'(EE|IE)$': 'Y',
r'(DT|ND|NT|RD|RT)$': 'D',
# From now on first letter must no longer change.
r'(?<!^)EV': 'AF',
r'(?<!^)[AEIOU]': 'A',
r'(?<!^)Q': 'G',
r'(?<!^)Z': 'S',
r'(?<!^)(?:M|KN)': 'N',
r'(?<!^)([^AEIOUY])H': r'\1',
r'(?<!^)(.)H[^AEIOUY]': r'\1',
r'(?<!^)([AEIOUY])W': r'\1',
r'AY$': r'Y',
r'S$': r'',
r'(\w)\1+': r'\1', # original
r'A+$': r'' # original
}
for k, v in table.items():
term = re.sub(k, v, term)
table = { # duplicate
r'(\w)\1+': r'\1', # duplicate
r'A+$': r'' # duplicate
} # duplicate
for k, v in table.items(): # duplicate
term = re.sub(k, v, term) # duplicate
return term
if __name__ == '__main__':
names = [
'Bishop', 'Carlson', 'Carr', 'Chapman', 'Franklin',
'Greene', 'Harper', 'Jacobs', 'Larson', 'Lawrence',
'Lawson', 'Louis, XVI', 'Lynch', 'Mackenzie', 'Matthews',
'McCormack', 'McDaniel', 'McDonald', 'Mclaughlin', 'Morrison',
"O'Banion", "O'Brien", 'Richards', 'Silva', 'Watkins',
'Wheeler', 'Willis', 'brown, sr', 'browne, III', 'browne, IV',
'knight', 'mitchell', "o'daniel",
]
for name in names:
print('%15s: %s' % (name, nysiis(name)))
答案 0 :(得分:1)
你不想在你的替换中使用dict:迭代的顺序与你列出的顺序不一样。如果你将你的dicts更改为对的列表,那么它就像你期望的那样工作
table = [
(r'\W+', ''),
#...
]
for k, v in table:
...