Question

我正在解决问题find median!

基本上我有三个疑问：

对于N == 1 and M is odd的情况我们无法直接找到MO4( A[0], B[M/2], B[M/2 -1], B[M/2 +1])？
我们为什么要按A缩短数组B 和 idxA？
此算法的时间复杂度O(logM + logN)。

// A C++ program to
// find median of two sorted arrays of unequal sizes
#include <bits/stdc++.h>
using namespace std;

// A utility function to find median of two integers
float MO2(int a, int b)
{ return ( a + b ) / 2.0; }

// A utility function to find median of three integers
float MO3(int a, int b, int c)
{
    return a + b + c - max(a, max(b, c))
                     - min(a, min(b, c));
}

// A utility function to find median of four integers
float MO4(int a, int b, int c, int d)
{
    int Max = max( a, max( b, max( c, d ) ) );
    int Min = min( a, min( b, min( c, d ) ) );
    return ( a + b + c + d - Max - Min ) / 2.0;
}

// Utility function to find median of single array
float medianSingle(int arr[], int n)
{
    if (n == 0)
        return -1;
    if (n%2 == 0)
        return (arr[n/2] + arr[n/2-1])/2;
    return arr[n/2];
}

// This function assumes that N is smaller than or equal to M
// This function returns -1 if both arrays are empty
float findMedianUtil( int A[], int N, int B[], int M )
{
    // If smaller array is empty, return median from second array
    if (N == 0)
        return medianSingle(B, M);

    // If the smaller array has only one element
    if (N == 1)
    {
        // Case 1: If the larger array also has one element,
        // simply call MO2()
        if (M == 1)
            return MO2(A[0], B[0]);

        // Case 2: If the larger array has odd number of elements,
        // then consider the middle 3 elements of larger array
        // and the only element of smaller array.
        // Take few examples like following
        // A = {9}, B[] = {5, 8, 10, 20, 30} and
        // A[] = {1}, B[] = {5, 8, 10, 20, 30}
        if (M & 1)
            return MO2( B[M/2], MO3(A[0], B[M/2 - 1], B[M/2 + 1]) );

        // Case 3: If the larger array has even number of element,
        // then median will be one of the following 3 elements
        // ... The middle two elements of larger array
        // ... The only element of smaller array
        return MO3( B[M/2], B[M/2 - 1], A[0] );
    }

    // If the smaller array has two elements
    else if (N == 2)
    {
        // Case 4: If the larger array also has two elements,
        // simply call MO4()
        if (M == 2)
            return MO4(A[0], A[1], B[0], B[1]);

        // Case 5: If the larger array has odd number of elements,
        // then median will be one of the following 3 elements
        // 1. Middle element of larger array
        // 2. Max of first element of smaller array and element
        //    just before the middle in bigger array
        // 3. Min of second element of smaller array and element
        //    just after the middle in bigger array
        if (M & 1)
            return MO3 ( B[M/2],
                         max(A[0], B[M/2 - 1]),
                         min(A[1], B[M/2 + 1])
                       );

        // Case 6: If the larger array has even number of elements,
        // then median will be one of the following 4 elements
        // 1) & 2) The middle two elements of larger array
        // 3) Max of first element of smaller array and element
        //    just before the first middle element in bigger array
        // 4. Min of second element of smaller array and element
        //    just after the second middle in bigger array
        return MO4 ( B[M/2],
                     B[M/2 - 1],
                     max( A[0], B[M/2 - 2] ),
                     min( A[1], B[M/2 + 1] )
                   );
    }

    int idxA = ( N - 1 ) / 2;
    int idxB = ( M - 1 ) / 2;

    // if A[idxA] <= B[idxB], then median must exist in
    // A[idxA...] and B[...idxB]
    if (A[idxA] <= B[idxB] )
        return findMedianUtil(A + idxA, N/2 + 1, B, M - idxA );

    // if A[idxA] > B[idxB], then median must exist in
    // A[...idxA] and B[idxB...] */
    return findMedianUtil(A, N/2 + 1, B + idxA, M - idxA );
}

// A wrapper function around findMedianUtil().
// This function makes sure that smaller array is
// passed as first argument to findMedianUtil
float findMedian( int A[], int N, int B[], int M )
{
    if (N > M)
        return findMedianUtil( B, M, A, N );

    return findMedianUtil( A, N, B, M );
}

// Driver program to test above functions
int main()
{
    int A[] = {900};
    int B[] = {5, 8, 10, 20};

    int N = sizeof(A) / sizeof(A[0]);
    int M = sizeof(B) / sizeof(B[0]);

    printf("%f", findMedian( A, N, B, M ) );
    return 0;
}

Answer 1

这两个表达式是等效的，因为B[M/2]的确切中位数为B（即使用B[M/2-1] <= B[M/2] <= B[M/2+1]）：

using a:=A[0], b:=B[M/2], bm:=B[M/2-1], bp:=B[M/2+1]
MO4(a, b, bm, bp)
= 0.5 * (a + b + bm + bp - min(a, b, bm, bp) - max(a, b, bm, bp))
= 0.5 * (b + (a + bm + bp - min(a, bm, bp) - max(a, bm, bp))
= MO2(b, MO3(a, bm, bp))

这是遵循分而治之的范式：将问题划分为子问题。在这里，你将两个数组的数量等于中位数。你不能将两个数组减半，因为它们的大小不同（因此中位数不会再在该范围的中间，即使算法返回错误的值）。对此in this related question有更广泛的答案。
对于案例N <= M而且不失一般性：
```
T(N, M) :=
| (N < 2 && M < 2)  O(1)
| (N < 2 && M >= 2) T(M) = O(log M)
| (N == 2)          T(M) = O(log M)
| (otherwise)       1 + T(N - N/2, M - N/2)
```
此处T(N, M)是findMedianUtil和T(M)的运行时，用于在单个数组中查找中位数。一般情况下，您会获得log N次计数，直到您达到最多O(log M)剩余的基本情况之一 - > O(log N + log M)。

作为旁注：O(log M + log N)是一种懒惰的写作方式O(log max(N,M))。对于N <= M，这等于O(log M)，因为在这种情况下O(log N)位于O(log M)。

找到两个不同大小的排序数组的中位数

1 个答案: