在这段代码中,我试图在字符串列表中找到最常用的名称,以使程序以O(nlogn)的身份运行。我知道这可以在O(n)中使用字典来完成。有什么明显的方法可以使这段代码更好?
def mostCommonName(L):
#in these two cases I'm making sure that if L has only one element in it
#or if it's empty that the correct result is returned before the loop
if len(L) == 1:
return L[0]
if len(L) == 0:
return None
#this automatically makes it nlogn
newL = sorted(L)
maxCount = 0
currCount = 1
maxName = set()
currName = newL[0]
for name in range(1,len(newL)):
if newL[name] == currName:
currCount += 1
else:
if currCount >= maxCount:
maxCount = currCount
currCount = 1
maxName.add(currName)
currName = newL[name]
else:
currCount = 1
currName = newL[name]
if newL.count(newL[-1]) == maxCount:
maxName.add(newL[-1])
if len(maxName) == 1:
return maxName.pop()
return maxName
答案 0 :(得分:1)
您可以改用groupby:
from operator import itemgetter
from itertools import groupby
def most_common_name(L):
l = sorted(L)
it = map(lambda pair: (pair[0], len(list(pair[1]))), groupby(l))
r, _ = max(it, key=itemgetter(1))
return r
result = most_common_name(['dan', 'david', 'dan', 'jen', 'james'])
print(result)
输出
dan
或更容易理解的替代方法:
from itertools import groupby
def len_group(pair):
return sum(1 for _ in pair[1])
def most_common_name(l):
sorted_l = sorted(l)
r, _ = max(groupby(sorted_l), key=len_group)
return r
result = most_common_name(['dan', 'david', 'dan', 'jen', 'james'])
print(result)
在这两种选择中,想法都是groupby处理连续值的分组。然后,您可以找到最大的组并返回该组的密钥。这些解决方案是 O(nlogn),如果您对 O(n)解决方案感兴趣,则可以使用Counter进行以下操作:
from operator import itemgetter
from collections import Counter
def most_common_name(l):
counts = Counter(l)
r, _ = max(counts.items(), key=itemgetter(1))
return r
result = most_common_name(['dan', 'david', 'dan', 'jen', 'james'])
print(result)
输出
dan
答案 1 :(得分:1)
稍微清洁一点,同时保持相同的算法:
def mostCommonName(L):
if len(L) == 0:
return None
newL = sorted(L)
occurrences, current_name = 1, newL[0]
best_occurrences, best_name = 1, newL[0]
for i in range(1, len(newL)):
if newL[i] == current_name:
occurrences += 1
else:
if occurrences > best_occurrences:
best_occurrences, best_name = occurrences, current_name
occurrences = 1
current_name = newL[i]
return best_name
或者:
from collections import Counter
def mostCommonName(L):
return Counter(L).most_common(1)[0][0]