我正在研究一种分治法,以确定列表中超过1/3的元素是否相同。 例如:[1,2,3,4]不,所有元素都是唯一的。 [1,1,2,4,5]是的,其中两个是相同的。
不进行分类,是否有分而治之的策略? 我在如何划分方面陷入困境...
C:\Users\Administrator>echo %JAVA_HOME%
C:\Program Files\Java\jdk1.8.0_221\
C:\Users\Administrator>echo %PATH%
C:\Program Files (x86)\Common Files\Oracle\Java\javapath;C:\Windows\system32;C:\
Windows;C:\Windows\System32\Wbem;C:\Windows\System32\WindowsPowerShell\v1.0\;C:\
Program Files\Amazon\cfn-bootstrap\;C:\Program Files\Git\cmd;C:\Program Files\Ja
va\jdk1.8.0_221\\bin;C:\maven\bin;C:\maven\bin
非常感谢!
答案 0 :(得分:0)
您可以对快速排序进行更改:
如果只需要检查频率n / 3,则对于一般情况,时间复杂度是线性的。
查找最频繁元素的时间复杂度与快速排序相同。
答案 1 :(得分:0)
您可以使用二进制搜索树(BST)。 1.在每个节点上创建BST维护密钥计数 2.遍历树以使用分而治之找到最大的密钥数 3.测试最大计数> n / 3 使用BST中的数据,分而治之很简单,因为我们只是 必须确定左侧,当前或右侧分支的重复计数最高。
# A utility function to create a new BST node
class newNode:
# Constructor to create a new node
def __init__(self, data):
self.key = data
self.count = 1
self.left = None
self.right = None
# A utility function to insert a new node
# with given key in BST
def insert(node, key):
# If the tree is empty, return a new node
if node == None:
k = newNode(key)
return k
# If key already exists in BST, increment
# count and return
if key == node.key:
(node.count) += 1
return node
# Otherwise, recur down the tree
if key < node.key:
node.left = insert(node.left, key)
else:
node.right = insert(node.right, key)
# return the (unchanged) node pointer
return node
# Finds the node with the highest count in a binary search tree
def MaxCount(node):
if node == None:
return 0, None
else:
left = MaxCount(node.left)
right = MaxCount(node.right)
current = node.count, node
return max([left, right, current], key=lambda x: x[0])
def generateBST(a):
root = None
for x in a:
root = insert(root, x)
return root
# Driver Code
if __name__ == '__main__':
a = [1, 2, 3, 1, 1]
root = generateBST(a)
cnt, node = MaxCount(root)
if cnt >= (len(a) // 3):
print(node.key) # Prints 1
else:
print(None)
n / 3的一种非分而治之技术,它从https://www.geeksforgeeks.org/n3-repeated-number-array-o1-space/开始具有O(n)时间:
# Python 3 program to find if
# any element appears more than
# n/3.
import sys
def appearsNBy3(arr, n):
count1 = 0
count2 = 0
first = sys.maxsize
second = sys.maxsize
for i in range(0, n):
# if this element is
# previously seen,
# increment count1.
if (first == arr[i]):
count1 += 1
# if this element is
# previously seen,
# increment count2.
elif (second == arr[i]):
count2 += 1
elif (count1 == 0):
count1 += 1
first = arr[i]
elif (count2 == 0):
count2 += 1
second = arr[i]
# if current element is
# different from both
# the previously seen
# variables, decrement
# both the counts.
else:
count1 -= 1
count2 -= 1
count1 = 0
count2 = 0
# Again traverse the array
# and find the actual counts.
for i in range(0, n):
if (arr[i] == first):
count1 += 1
elif (arr[i] == second):
count2 += 1
if (count1 > n / 3):
return first
if (count2 > n / 3):
return second
return -1
# Driver code
arr = [1, 2, 3, 1, 1 ]
n = len(arr)
print(appearsNBy3(arr, n))
答案 2 :(得分:0)
这是我为乐趣而尝试的草稿。看起来分而治之可能会减少候选频率检查的次数,但我不确定(请参阅最后一个示例,其中对整个列表仅检查0)。
如果将列表分成三部分,则有效候选者可以拥有的最小频率是每个部分的1/3。这缩小了我们搜索其他部分的候选人的范围。令f(A, l, r)
代表在其父组中频率可能为1/3或更高的候选者。然后:
from math import ceil
def f(A, l, r):
length = r - l + 1
if length <= 3:
candidates = A[l:r+1]
print "l, r, candidates: %s, %s, %s\n" % (l, r, candidates)
return candidates
i = 0
j = 0
third = length // 3
lg_third = int(ceil(length / float(3)))
sm_third = lg_third // 3
if length % 3 == 1:
i, j = l + third, l + 2 * third
elif length % 3 == 2:
i, j = l + third, l + 2 * third + 1
else:
i, j = l + third - 1, l + 2 * third - 1
left_candidates = f(A, l, i)
middle_candidates = f(A, i + 1, j)
right_candidates = f(A, j + 1, r)
print "length: %s, sm_third: %s, lg_third: %s" % (length, sm_third, lg_third)
print "Candidate parts: %s, %s, %s" % (left_candidates, middle_candidates, right_candidates)
left_part = A[l:i+1]
middle_part = A[i+1:j+1]
right_part = A[j+1:r+1]
candidates = []
seen = []
for e in left_candidates:
if e in seen or e in candidates:
continue
seen.append(e)
count = left_part.count(e)
if count >= lg_third:
candidates.append(e)
else:
middle_part_count = middle_part.count(e)
print "Left: counting %s in middle: %s" % (e, middle_part_count)
if middle_part_count >= sm_third:
count = count + middle_part_count
right_part_count = right_part.count(e)
print "Left: counting %s in right: %s" % (e, right_part_count)
if right_part_count >= sm_third:
count = count + right_part_count
if count >= lg_third:
candidates.append(e)
seen = []
for e in middle_candidates:
if e in seen or e in candidates:
continue
seen.append(e)
count = middle_part.count(e)
if count >= lg_third:
candidates.append(e)
else:
left_part_count = left_part.count(e)
print "Middle: counting %s in left: %s" % (e, left_part_count)
if left_part_count >= sm_third:
count = count + left_part_count
right_part_count = right_part.count(e)
print "Middle: counting %s in right: %s" % (e, right_part_count)
if right_part_count >= sm_third:
count = count + right_part_count
if count >= lg_third:
candidates.append(e)
seen = []
for e in right_candidates:
if e in seen or e in candidates:
continue
seen.append(e)
count = right_part.count(e)
if count >= lg_third:
candidates.append(e)
else:
left_part_count = left_part.count(e)
print "Right: counting %s in left: %s" % (e, left_part_count)
if left_part_count >= sm_third:
count = count + left_part_count
middle_part_count = middle_part.count(e)
print "Right: counting %s in middle: %s" % (e, middle_part_count)
if middle_part_count >= sm_third:
count = count + middle_part_count
if count >= lg_third:
candidates.append(e)
print "l, r, candidates: %s, %s, %s\n" % (l, r, candidates)
return candidates
#A = [1, 1, 2, 4, 5]
#A = [1, 2, 3, 1, 2, 3, 1, 2, 3]
#A = [1, 1, 1, 1, 1, 2, 2, 2, 2, 3]
A = [2, 2, 1, 3, 3, 1, 4, 4, 1]
#A = [x for x in range(1, 13)] + [0] * 6
print f(A, 0, len(A) - 1)