Question

我正在研究一种分治法，以确定列表中超过1/3的元素是否相同。例如：[1,2,3,4]不，所有元素都是唯一的。 [1,1,2,4,5]是的，其中两个是相同的。

不进行分类，是否有分而治之的策略？我在如何划分方面陷入困境...

C:\Users\Administrator>echo %JAVA_HOME%
C:\Program Files\Java\jdk1.8.0_221\

C:\Users\Administrator>echo %PATH%
C:\Program Files (x86)\Common Files\Oracle\Java\javapath;C:\Windows\system32;C:\
Windows;C:\Windows\System32\Wbem;C:\Windows\System32\WindowsPowerShell\v1.0\;C:\
Program Files\Amazon\cfn-bootstrap\;C:\Program Files\Git\cmd;C:\Program Files\Ja
va\jdk1.8.0_221\\bin;C:\maven\bin;C:\maven\bin

非常感谢！

Answer 1

您可以对快速排序进行更改：

选择一个枢轴
将数组划分为小于枢轴的数组
找到每边最频繁的元素
返回递归调用中两个元素中最频繁的元素（及其频率计数）
如果只需要检查频率n / 3，则只能在子阵列大于n / 3时检查。

如果只需要检查频率n / 3，则对于一般情况，时间复杂度是线性的。

查找最频繁元素的时间复杂度与快速排序相同。

Answer 2

您可以使用二进制搜索树（BST）。 1.在每个节点上创建BST维护密钥计数 2.遍历树以使用分而治之找到最大的密钥数 3.测试最大计数> n / 3 使用BST中的数据，分而治之很简单，因为我们只是必须确定左侧，当前或右侧分支的重复计数最高。

# A utility function to create a new BST node  
class newNode:  
    # Constructor to create a new node  
    def __init__(self, data):  
        self.key = data 
        self.count = 1
        self.left = None
        self.right = None

# A utility function to insert a new node  
# with given key in BST  
def insert(node, key): 
    # If the tree is empty, return a new node  
    if node == None: 
        k = newNode(key) 
        return k 

    # If key already exists in BST, increment 
    # count and return  
    if key == node.key: 
        (node.count) += 1
        return node 

    # Otherwise, recur down the tree  
    if key < node.key:  
        node.left = insert(node.left, key)  
    else: 
        node.right = insert(node.right, key) 

    # return the (unchanged) node pointer  
    return node 

# Finds the node with the highest count in a binary search tree
def MaxCount(node):
  if node == None:
    return 0, None
  else:
    left = MaxCount(node.left)
    right = MaxCount(node.right)
    current = node.count, node

    return max([left, right, current], key=lambda x: x[0])

def generateBST(a):
  root = None
  for x in a:
    root = insert(root, x)

  return root

# Driver Code 
if __name__ == '__main__': 
    a = [1, 2, 3, 1, 1]
    root = generateBST(a)
    cnt, node = MaxCount(root)
    if cnt >= (len(a) // 3):
      print(node.key)  # Prints 1
    else:
      print(None)

n / 3的一种非分而治之技术，它从https://www.geeksforgeeks.org/n3-repeated-number-array-o1-space/开始具有O（n）时间：

# Python 3 program to find if  
# any element appears more than 
# n/3. 
import sys 

def appearsNBy3(arr, n): 

    count1 = 0
    count2 = 0
    first = sys.maxsize 
    second = sys.maxsize 

    for i in range(0, n):  

        # if this element is 
        # previously seen,  
        # increment count1. 
        if (first == arr[i]): 
            count1 += 1

        # if this element is 
        # previously seen,  
        # increment count2. 
        elif (second == arr[i]): 
            count2 += 1

        elif (count1 == 0): 
            count1 += 1
            first = arr[i] 

        elif (count2 == 0): 
            count2 += 1
            second = arr[i] 


        # if current element is  
        # different from both 
        # the previously seen  
        # variables, decrement 
        # both the counts. 
        else: 
            count1 -= 1
            count2 -= 1



    count1 = 0
    count2 = 0

    # Again traverse the array 
    # and find the actual counts. 
    for i in range(0, n):  
        if (arr[i] == first): 
            count1 += 1

        elif (arr[i] == second): 
            count2 += 1


    if (count1 > n / 3): 
        return first 

    if (count2 > n / 3): 
        return second 

    return -1

# Driver code 
arr = [1, 2, 3, 1, 1 ] 
n = len(arr)  
print(appearsNBy3(arr, n))

Answer 3

这是我为乐趣而尝试的草稿。看起来分而治之可能会减少候选频率检查的次数，但我不确定（请参阅最后一个示例，其中对整个列表仅检查0）。

如果将列表分成三部分，则有效候选者可以拥有的最小频率是每个部分的1/3。这缩小了我们搜索其他部分的候选人的范围。令f(A, l, r)代表在其父组中频率可能为1/3或更高的候选者。然后：

from math import ceil

def f(A, l, r):
  length = r - l + 1

  if length <= 3:
    candidates = A[l:r+1]
    print "l, r, candidates: %s, %s, %s\n" % (l, r, candidates)
    return candidates

  i = 0
  j = 0
  third = length // 3
  lg_third = int(ceil(length / float(3)))
  sm_third = lg_third // 3

  if length % 3 == 1:
    i, j = l + third, l + 2 * third
  elif length % 3 == 2:
    i, j = l + third, l + 2 * third + 1
  else:
    i, j = l + third - 1, l + 2 * third - 1

  left_candidates = f(A, l, i)
  middle_candidates = f(A, i + 1, j)
  right_candidates = f(A, j + 1, r)
  print "length: %s, sm_third: %s, lg_third: %s" % (length, sm_third, lg_third)
  print "Candidate parts: %s, %s, %s" % (left_candidates, middle_candidates, right_candidates)
  left_part = A[l:i+1]
  middle_part = A[i+1:j+1]
  right_part = A[j+1:r+1]
  candidates = []
  seen = []

  for e in left_candidates:
    if e in seen or e in candidates:
      continue
    seen.append(e)
    count = left_part.count(e)
    if count >= lg_third:
      candidates.append(e)
    else:
      middle_part_count = middle_part.count(e)
      print "Left: counting %s in middle: %s" % (e, middle_part_count)
      if middle_part_count >= sm_third:
        count = count + middle_part_count
      right_part_count = right_part.count(e)
      print "Left: counting %s in right: %s" % (e, right_part_count)
      if right_part_count >= sm_third:
        count = count + right_part_count
      if count >= lg_third:
        candidates.append(e)

  seen = []
  for e in middle_candidates:
    if e in seen or e in candidates:
      continue
    seen.append(e)
    count = middle_part.count(e)
    if count >= lg_third:
      candidates.append(e)
    else:
      left_part_count = left_part.count(e)
      print "Middle: counting %s in left: %s" % (e, left_part_count)
      if left_part_count >= sm_third:
        count = count + left_part_count
      right_part_count = right_part.count(e)
      print "Middle: counting %s in right: %s" % (e, right_part_count)
      if right_part_count >= sm_third:
        count = count + right_part_count
      if count >= lg_third:
        candidates.append(e)

  seen = []
  for e in right_candidates:
    if e in seen or e in candidates:
      continue
    seen.append(e)
    count = right_part.count(e)
    if count >= lg_third:
      candidates.append(e)
    else:
      left_part_count = left_part.count(e)
      print "Right: counting %s in left: %s" % (e, left_part_count)
      if left_part_count >= sm_third:
        count = count + left_part_count
      middle_part_count = middle_part.count(e)
      print "Right: counting %s in middle: %s" % (e, middle_part_count)
      if middle_part_count >= sm_third:
        count = count + middle_part_count
      if count >= lg_third:
        candidates.append(e)
  print "l, r, candidates: %s, %s, %s\n" % (l, r, candidates)
  return candidates


#A = [1, 1, 2, 4, 5]
#A = [1, 2, 3, 1, 2, 3, 1, 2, 3]
#A = [1, 1, 1, 1, 1, 2, 2, 2, 2, 3]
A = [2, 2, 1, 3, 3, 1, 4, 4, 1]
#A = [x for x in range(1, 13)] + [0] * 6
print f(A, 0, len(A) - 1)

分而治之策略确定列表中相同元素是否超过1/3

3 个答案: