找到两个不同大小的排序数组的中位数

时间:2016-06-26 02:59:28

标签: c++ arrays algorithm

我正在解决问题find median!

基本上我有三个疑问:

  1. 对于N == 1 and M is odd的情况 我们无法直接找到MO4( A[0], B[M/2], B[M/2 -1], B[M/2 +1])
  2. 我们为什么要按A缩短数组B idxA
  3. 此算法的时间复杂度O(logM + logN)
  4. // A C++ program to
    // find median of two sorted arrays of unequal sizes
    #include <bits/stdc++.h>
    using namespace std;
    
    // A utility function to find median of two integers
    float MO2(int a, int b)
    { return ( a + b ) / 2.0; }
    
    // A utility function to find median of three integers
    float MO3(int a, int b, int c)
    {
        return a + b + c - max(a, max(b, c))
                         - min(a, min(b, c));
    }
    
    // A utility function to find median of four integers
    float MO4(int a, int b, int c, int d)
    {
        int Max = max( a, max( b, max( c, d ) ) );
        int Min = min( a, min( b, min( c, d ) ) );
        return ( a + b + c + d - Max - Min ) / 2.0;
    }
    
    // Utility function to find median of single array
    float medianSingle(int arr[], int n)
    {
        if (n == 0)
            return -1;
        if (n%2 == 0)
            return (arr[n/2] + arr[n/2-1])/2;
        return arr[n/2];
    }
    
    // This function assumes that N is smaller than or equal to M
    // This function returns -1 if both arrays are empty
    float findMedianUtil( int A[], int N, int B[], int M )
    {
        // If smaller array is empty, return median from second array
        if (N == 0)
            return medianSingle(B, M);
    
        // If the smaller array has only one element
        if (N == 1)
        {
            // Case 1: If the larger array also has one element,
            // simply call MO2()
            if (M == 1)
                return MO2(A[0], B[0]);
    
            // Case 2: If the larger array has odd number of elements,
            // then consider the middle 3 elements of larger array
            // and the only element of smaller array.
            // Take few examples like following
            // A = {9}, B[] = {5, 8, 10, 20, 30} and
            // A[] = {1}, B[] = {5, 8, 10, 20, 30}
            if (M & 1)
                return MO2( B[M/2], MO3(A[0], B[M/2 - 1], B[M/2 + 1]) );
    
            // Case 3: If the larger array has even number of element,
            // then median will be one of the following 3 elements
            // ... The middle two elements of larger array
            // ... The only element of smaller array
            return MO3( B[M/2], B[M/2 - 1], A[0] );
        }
    
        // If the smaller array has two elements
        else if (N == 2)
        {
            // Case 4: If the larger array also has two elements,
            // simply call MO4()
            if (M == 2)
                return MO4(A[0], A[1], B[0], B[1]);
    
            // Case 5: If the larger array has odd number of elements,
            // then median will be one of the following 3 elements
            // 1. Middle element of larger array
            // 2. Max of first element of smaller array and element
            //    just before the middle in bigger array
            // 3. Min of second element of smaller array and element
            //    just after the middle in bigger array
            if (M & 1)
                return MO3 ( B[M/2],
                             max(A[0], B[M/2 - 1]),
                             min(A[1], B[M/2 + 1])
                           );
    
            // Case 6: If the larger array has even number of elements,
            // then median will be one of the following 4 elements
            // 1) & 2) The middle two elements of larger array
            // 3) Max of first element of smaller array and element
            //    just before the first middle element in bigger array
            // 4. Min of second element of smaller array and element
            //    just after the second middle in bigger array
            return MO4 ( B[M/2],
                         B[M/2 - 1],
                         max( A[0], B[M/2 - 2] ),
                         min( A[1], B[M/2 + 1] )
                       );
        }
    
        int idxA = ( N - 1 ) / 2;
        int idxB = ( M - 1 ) / 2;
    
        // if A[idxA] <= B[idxB], then median must exist in
        // A[idxA...] and B[...idxB]
        if (A[idxA] <= B[idxB] )
            return findMedianUtil(A + idxA, N/2 + 1, B, M - idxA );
    
        // if A[idxA] > B[idxB], then median must exist in
        // A[...idxA] and B[idxB...] */
        return findMedianUtil(A, N/2 + 1, B + idxA, M - idxA );
    }
    
    // A wrapper function around findMedianUtil().
    // This function makes sure that smaller array is
    // passed as first argument to findMedianUtil
    float findMedian( int A[], int N, int B[], int M )
    {
        if (N > M)
            return findMedianUtil( B, M, A, N );
    
        return findMedianUtil( A, N, B, M );
    }
    
    // Driver program to test above functions
    int main()
    {
        int A[] = {900};
        int B[] = {5, 8, 10, 20};
    
        int N = sizeof(A) / sizeof(A[0]);
        int M = sizeof(B) / sizeof(B[0]);
    
        printf("%f", findMedian( A, N, B, M ) );
        return 0;
    }
    

1 个答案:

答案 0 :(得分:0)

  1. 这两个表达式是等效的,因为B[M/2]的确切中位数为B(即使用B[M/2-1] <= B[M/2] <= B[M/2+1]):

    using a:=A[0], b:=B[M/2], bm:=B[M/2-1], bp:=B[M/2+1]
    MO4(a, b, bm, bp)
    = 0.5 * (a + b + bm + bp - min(a, b, bm, bp) - max(a, b, bm, bp))
    = 0.5 * (b + (a + bm + bp - min(a, bm, bp) - max(a, bm, bp))
    = MO2(b, MO3(a, bm, bp))
    
  2. 这是遵循分而治之的范式:将问题划分为子问题。在这里,你将两个数组的数量等于中位数。你不能将两个数组减半,因为它们的大小不同(因此中位数不会再在该范围的中间,即使算法返回错误的值)。对此in this related question有更广泛的答案。

  3. 对于案例N <= M而且不失一般性:

    T(N, M) :=
    | (N < 2 && M < 2)  O(1)
    | (N < 2 && M >= 2) T(M) = O(log M)
    | (N == 2)          T(M) = O(log M)
    | (otherwise)       1 + T(N - N/2, M - N/2)
    

    此处T(N, M)findMedianUtilT(M)的运行时,用于在单个数组中查找中位数。一般情况下,您会获得log N次计数,直到您达到最多O(log M)剩余的基本情况之一 - > O(log N + log M)

    作为旁注:O(log M + log N)是一种懒惰的写作方式O(log max(N,M))。对于N <= M,这等于O(log M),因为在这种情况下O(log N)位于O(log M)