我有2个列表列表(即list_1
和list_2
),需要检查它们的相似性。我想跟踪最高匹配数,并获取具有最高匹配数的list_1和list_2的索引。
注意:最高匹配数可以大于1,因为最高匹配数可以再次出现(重复)。
我尝试使用max
函数找到最高的值,但是它没有给我其他最高的重复值。
list_of_similarities = []
def similarities():
print("SIMILARITIES")
for i in range(len(list_1)):
for j in range(len(list_2)):
if set(list_2[j]) & set(list_1[i]):
matchingValues = set(list_2[j]) & set(list_1[i])
print('list_1[{}], list_2[{}]'.format(i, j), 'matching value:',set(list_2[j]) & set(list_1[i]))
list_of_similarities.append(matchingValues)
print("")
print("")
print("The maximum matches found are:")
print(max(list_of_similarities))
list_of_similarities.clear()
list_1 = [['a','b','c'],['d','e','g'],['l','r'],['z']]
list_2 = [['b','c'], ['l','e','a'], ['f'], ['z','r'], ['x', 'b', 'c']]
当前结果
# list_1[0], list_2[0] matching value: {'b', 'c'}
# list_1[0], list_2[1] matching value: {'a'}
# list_1[0], list_2[4] matching value: {'b', 'c'}
# The maximum matches found are:
# {'b', 'c'}
# list_1[1], list_2[1] matching value: {'e'}
# The maximum matches found are:
# {'e'}
# list_1[2], list_2[1] matching value: {'l'}
# The maximum matches found are:
# {'l'}
# list_1[2], list_2[3] matching value: {'r'}
# The maximum matches found are:
# {'r'}
# list_1[3], list_2[3] matching value: {'z'}
# The maximum matches found are:
# {'z'}
预期结果
# list_1[0], list_2[0] matching value: {'b', 'c'}
# list_1[0], list_2[1] matching value: {'a'}
# list_1[0], list_2[4] matching value: {'b', 'c'}
# The maximum matches found are:
# {'b', 'c'}
# The list_1 and list_2 indexes for highest matches are:
# [0,[0, 4]]
# list_1[1], list_2[1] matching value: {'e'}
# The maximum matches found are:
# {'e'}
# The list_1 and list_2 indexes for highest matches are:
# [1,[1]]
# list_1[2], list_2[1] matching value: {'l'}
# The maximum matches found are:
# {'l'}
# The list_1 and list_2 indexes for highest matches are:
# [2,[1]]
# list_1[2], list_2[3] matching value: {'r'}
# The maximum matches found are:
# {'r'}
# The list_1 and list_2 indexes for highest matches are:
# [2,[3]]
# list_1[3], list_2[3] matching value: {'z'}
# The maximum matches found are:
# {'z'}
# The list_1 and list_2 indexes for highest matches are:
# [3,[3]]
答案 0 :(得分:1)
list_of_similarities = []
def similarities():
print("SIMILARITIES")
for i in range(len(list_1)):
idx_list2 = []
for j in range(len(list_2)):
if set(list_2[j]) & set(list_1[i]):
matchingValues = set(list_2[j]) & set(list_1[i])
print('list_1[{}], list_2[{}]'.format(i, j), 'matching value:',set(list_2[j]) & set(list_1[i]))
list_of_similarities.append(matchingValues)
print("The maximum matches found are:")
print(max(list_of_similarities))
val = max(list_of_similarities)
for idx, item in enumerate(list_2):
# check if item contains all elements in val
result = all(elem in item for elem in list(val))
if result:
idx_list2.append(idx)
print ("The list_1 and list_2 indexes for highest matches are:")
print ([i,idx_list2])
print ("")
list_of_similarities.clear()
list_1 = [['a','b','c'],['d','e','g'],['l','r'],['z']]
list_2 = [['b','c'], ['l','e','a'], ['f'], ['z','r'], ['x', 'b', 'c']]
similarities()
输出:
SIMILARITIES
list_1[0], list_2[0] matching value: {'c', 'b'}
list_1[0], list_2[1] matching value: {'a'}
list_1[0], list_2[4] matching value: {'c', 'b'}
The maximum matches found are:
{'c', 'b'}
The list_1 and list_2 indexes for highest matches are:
[0, [0, 4]]
list_1[1], list_2[1] matching value: {'e'}
The maximum matches found are:
{'e'}
The list_1 and list_2 indexes for highest matches are:
[1, [1]]
list_1[2], list_2[1] matching value: {'l'}
list_1[2], list_2[3] matching value: {'r'}
The maximum matches found are:
{'l'}
The list_1 and list_2 indexes for highest matches are:
[2, [1]]
list_1[3], list_2[3] matching value: {'z'}
The maximum matches found are:
{'z'}
The list_1 and list_2 indexes for highest matches are:
[3, [3]]
答案 1 :(得分:0)
Zen of Python断言“扁平比嵌套好”,因此这种解决问题的方法没有使用显式的嵌套循环。话虽如此,理解中存在很多循环,因此它可能比使用嵌套的for循环要慢。
它使用itertools.product创建配对的配对。
>>> pairs = itertools.product(['a', 'b'], [1, 2])
>>> for p, q in pairs:print(p, q)
...
a 1
a 2
b 1
b 2
和itertools.groupby,以根据第一个列表中的元素将对分组:
>>> pairs = itertools.product(['a', 'b'], [1, 2])
>>> for k, g in itertools.groupby(pairs, key=lambda x: x[0]):
... print(k, list(g))
...
a [('a', 1), ('a', 2)]
b [('b', 1), ('b', 2)]
在一组(冻结的)集合上调用max
时,它指定max
必须使用集合的长度。这是因为默认情况下,一个集合的大于操作返回该集合是否是另一个集合的超集,而不是是否更长
>>> set([1, 2]) > set([3, 4, 5])
False
>>> max([set([1, 2]), set([1, 2, 3]), set([4, 5, 6, 7, 8, 9])])
{1, 2, 3}
>>> max([set([1, 2]), set([1, 2, 3]), set([4, 5, 6, 7, 8, 9])], key=len)
{4, 5, 6, 7, 8, 9}
如果存在多个匹配项,此方法会正确报告所有“最长”匹配项。匹配项存储为冻结集,这样,如果匹配项不止一次,则可以很容易地对它们进行重复数据删除。
import itertools
def similarities():
# Create format strings.
matched_fmt = 'list_1[{}], list_2[{}] matching value: {}'
index_fmt = '[{}, {}]'
print("SIMILARITIES")
# Get the cartesian product of the two lists.
product = itertools.product(list_1, list_2)
# Iterate over the product, grouping by the element in the first list.
# Enumerate the iteration so that we know the index of the item in the first list.
for i, (_, g) in enumerate(itertools.groupby(product, key=lambda x: x[0])):
# List all matches and the index of the second list element.
matches = [(j, frozenset(p) & frozenset(q)) for (j, (p, q)) in enumerate(g)]
# Find the longest matches.
longest = len(max(matches, key=lambda x: len(x[1]))[1])
longest_matches = [(idx, match) for (idx, match) in matches
if len(match) == longest]
found_matches = [(idx, match) for (idx, match) in matches if match]
unique_matches = {match for (_, match) in longest_matches}
# Report.
found_lines = [matched_fmt.format(i, index, match)
for index, match in found_matches]
print('\n'.join(found_lines))
print("The maximum matches found are:")
print(' '.join(str(match) for match in unique_matches))
print('The list_1 and list_2 indexes for the highest matches are:')
print(index_fmt.format(i, [index for (index, _) in longest_matches]))
print()
该函数产生以下输出:
SIMILARITIES
list_1[0], list_2[0] matching value: frozenset({'c', 'b'})
list_1[0], list_2[1] matching value: frozenset({'a'})
list_1[0], list_2[4] matching value: frozenset({'c', 'b'})
The maximum matches found are:
frozenset({'c', 'b'})
The list_1 and list_2 indexes for the highest matches are:
[0, [0, 4]]
list_1[1], list_2[1] matching value: frozenset({'e'})
The maximum matches found are:
frozenset({'e'})
The list_1 and list_2 indexes for the highest matches are:
[1, [1]]
list_1[2], list_2[1] matching value: frozenset({'l'})
list_1[2], list_2[3] matching value: frozenset({'r'})
The maximum matches found are:
frozenset({'r'}) frozenset({'l'})
The list_1 and list_2 indexes for the highest matches are:
[2, [1, 3]]
list_1[3], list_2[3] matching value: frozenset({'z'})
The maximum matches found are:
frozenset({'z'})
The list_1 and list_2 indexes for the highest matches are:
[3, [3]]