C

时间:2016-02-02 07:51:29

标签: c arrays binary-search

我试图了解如何修改二进制搜索,以便它适用于第一次和最后一次,当然我可以在网上找到一些代码,但我试图深入理解,这里是我找到了一些基本的非递归二进制搜索:

int BinarySearch(int *array, int number_of_elements, int key)
{
    int low = 0, high = number_of_elements-1, mid;
    while(low <= high)
    {
            mid = (low + high)/2;
            if(array[mid] < key)
            {
                    low = mid + 1; 
            }
            else if(array[mid] == key)
            {
                    return mid;
            }
            else if(array[mid] > key)
            {
                    high = mid-1;
            }

    }
    return -1;
}

我需要做哪些修改以及它们背后的逻辑是什么?

  

编辑:我希望它高效而不是线性完成。

5 个答案:

答案 0 :(得分:3)

对包含一组有序值的数组进行二进制搜索,其中值可能出现多次,但不一定会产生第一个或最后一个元素。

它产生它找到的第一个匹配元素。

由于此元素可能被更多匹配元素包围,因此需要第二步,以便找到第一个和最后一个匹配元素。这可以通过其他帖子建议的线性搜索来完成,也可以在对数时间内完成。

让我成为第一个找到的匹配的索引,如二进制搜索所报告的那样。

然后,“等于序列”的开始在[0..i]中。并且“等于序列”的结尾在[i..N-1]中,其中N是序列的长度。递归地将这些间隔一分为二,直到找到边界,最终产生第一个和最后一个匹配。

以下(f#)程序在几行中显示了这个想法。编写等效的C函数应该是一件小事。

let equal_range (a : int[]) i =
    let rec first i0 i1 = 
        if a.[i0] = a.[i1] || (i1-i0) < 2 then
            if a.[i0] <> a.[i1] 
            then
                i1
            else
                i0
        else
            let mid = (i1 - i0) / 2 + i0
            if a.[mid] = a.[i1] then first i0 mid else first mid i1
    let rec last i0 i1 = 
        if a.[i1] = a.[i0] || i1-i0 < 2 then 
            if a.[i0] <> a.[i1] 
            then
                i0
            else
                i1
        else
            let mid = (i1 - i0) / 2 + i0
            if a.[mid] = a.[i0] then last mid i1 else last i0 mid
    (first 0 i),(last i (Array.length a - 1))

let test_arrays = 
    [
        Array.ofList ([1..4] @ [5;5;5;5;5] @ [6..10])
        [|1|]
        [|1;1;1;1;1|]
    ]

test_arrays
|> List.iter(fun a -> 
        printfn "%A" a 
        for i = 0 to Array.length a - 1 do
            printfn "%d(a.[%d] = %d): %A" i i (a.[i]) (equal_range a i)
    )

这里是等效的非递归C代码:

#include <stdio.h>
#include <assert.h>
#include <stdbool.h>

typedef struct IndexPair_tag
{
    size_t a;
    size_t b;
} IndexPair_t;

bool equal_range(const int * a, size_t n, size_t i, IndexPair_t * result)
{
    if (NULL == a) return false;
    if (NULL == result) return false;
    if (i >= n) return false;

    size_t i0, i1, mid;

    i0 = 0;
    i1 = i;
    while (a[i0] != a[i1] && ((i1 - i0) > 1))
    {
        mid = (i1 - i0) / 2 + i0;
        if (a[mid] == a[i1])
        {
            i1 = mid;
        }
        else
        {
            i0 = mid;
        }
    }
    if (a[i0] != a[i1])
        result->a = i1;
    else
        result->a = i0;

    i0 = i;
    i1 = n - 1;
    while (a[i0] != a[i1] && ((i1 - i0) > 1))
    {
        mid = (i1 - i0) / 2 + i0;
        if (a[mid] == a[i0])
        {
            i0 = mid;
        }
        else
        {
            i1 = mid;
        }
    }
    if (a[i0] != a[i1] )
        result->b = i0;
    else
        result->b = i1;

    return true;
}

static void ShowArray(int *a, size_t N)
{
    if (N > 0)
    {
        printf("[%d", a[0]);
        for (size_t i = 1; i < N; i++)
        {
            printf(", %d", a[i]);
        }
        printf("]\n");
    }
    else
        printf("[]\n");

}

int main()
{
    {
        const size_t N = 14;
        int a[N] = { 1,2,3,4,5,5,5,5,5,6,7,8,9,10 };
        ShowArray(a, N);
        IndexPair_t result;
        for (size_t i = 0; i < N; i++)
        {
            if (equal_range(a, 14, i, &result))
            {
                printf("%d(a[%d] = %d): (%d,%d)\n", i, i, a[i], result.a, result.b);
                assert(a[result.a] == a[result.b]);
            }
            else
            {
                printf("For i = %d, equal_range() returned false.\n", i);
                assert(false);
            }
        }
    }
    {
        const size_t N = 1;
        int a[N] = { 1 };
        ShowArray(a, N);
        IndexPair_t result;
        for (size_t i = 0; i < N; i++)
        {
            if (equal_range(a, N, i, &result))
            {
                printf("%d(a[%d] = %d): (%d,%d)\n", i, i, a[i], result.a, result.b);
                assert(a[result.a] == a[result.b]);
            }
            else
            {
                printf("For i = %d, equal_range() returned false.\n", i);
                assert(false);
            }
        }
    }
    {
        const size_t N = 5;
        int a[N] = { 1,1,1,1,1 };
        ShowArray(a, N);
        IndexPair_t result;
        for (size_t i = 0; i < N; i++)
        {
            if (equal_range(a, N, i, &result))
            {
                printf("%d(a[%d] = %d): (%d,%d)\n", i, i, a[i], result.a, result.b);
                assert(a[result.a] == a[result.b]);
            }
            else
            {
                printf("For i = %d, equal_range() returned false.\n", i);
                assert(false);
            }
        }
    }

    return 0;
}

更新:乔纳森是对的,功能的设计很草率,并且有一些角落问题。

  • 修正了函数无法报告参数错误的事实。
  • 为equal_range()添加了防御性参数测试。
  • 修正了这样一个事实:对于边缘情况,产生了错误的结果。
  • 更改了测试驱动程序(主程序),以便涵盖所有边缘情况。

事实上,函数采用索引,而不是值是正确的,恕我直言,因为它应该是第二步,在产生所查找元素的索引的第一步之后。

答案 1 :(得分:0)

根据我的理解,如果找到您要查找的元素,一个选项是向左或向右执行线性搜索,以找到最后一次出现的元素。

或者,您可以再次使用二分搜索来调整位置,直到下一个位置的元素发生变化。这可以通过修改上面的中间案例来完成;如果找到该元素,则不要返回其位置,直到该位置保持不变为止。

答案 2 :(得分:0)

您可以运行二进制搜索以查找数组中键的任意任意(如果存在)。

接下来,第一次出现在返回的索引的左侧,因此您相应地设置边界并重复运行二进制搜索,直到索引左侧的元素不等于键。在最后一次出现时类似地进行

binary_search(0, n-1, key)
{
    /* run binary search, get index of key */
    leftmost=min(leftmost, index)
    if (array[index-1]==key and leftmost==index)
        binary_search(0, index-1, key)
    rightmost=max(rightmost, index)
    if (array[index+1]==key and rightmost==index)
        binary_search(index+1, n-1, key)
}

答案 3 :(得分:0)

只需对元素(e)执行常规二进制搜索。让我们说它返回索引i。 为了找到第一次出现的索引,只需重复搜索[0&lt; - &gt; i-1]上的e,直到搜索不再找到e为止。 类似地,最后一次出现[i + 1&lt; - &gt; n]。

答案 4 :(得分:0)

以下是二元搜索的4种变体,以及测试代码。在抽象术语中,这些搜索有序数组X [1..N],它可能包含重复数据值,用于特定值T.

  • BinSearch_A() - 找到X [P] = T的任意索引P.
  • BinSearch_B() - 找到X [P] = T的最小指数P.
  • BinSearch_C() - 找到X [P] = Y的最大指数P.
  • BinSearch_D() - 找到索引的范围P..Q,其中X [P] = T(但是X [P-1]!= T)并且X [Q] = T(但是X) [Q + 1]!= T)。

BinSearch_D()是问题所需要的,但理解其他三个将有助于理解最终结果。

该代码基于Programming Pearls, 1st Edn第8章的材料。它显示BinSearch_A() - 它在前面的章节中开发 - 然后将其修改为BinSearch_B(),作为优化搜索1000个条目数组中的值的第一步。

提供的代码包含基于1的数组的伪代码,基于0的数组的实际C代码,以及广泛验证数据和结果的测试工具。它是330行,其中30行是空白的,大约有100个是注释,12个是标题,类型和声明,90个是4个实现,100个是测试代码。

请注意,如果您知道数组中的值是唯一的,那么BinSearch_D()不是正确的算法(使用BinSearch_A())。同样,如果选择一组相同元素中的哪个元素无关紧要,BinSearch_D()不是正确的算法(使用BinSearch_A())。两个变体BinSearch_B()BinSearch_C()是专用的,但如果您只需要搜索到的值所在的最小索引或最大索引,则工作量比BinSearch_D()少。

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>

typedef struct Pair
{
    int lo;
    int hi;
} Pair;

extern Pair BinSearch_D(int N, const int X[N], int T);
extern int BinSearch_A(int N, const int X[N], int T);
extern int BinSearch_B(int N, const int X[N], int T);
extern int BinSearch_C(int N, const int X[N], int T);

/*
** Programming Pearls, 1st Edn, 8.3 Major Surgery - Binary Search
**
** In each fragment, P contains the result (0 indicates 'not found').
**
** Search for T in X[1..N] - BinSearch-A
**
**      L := 1; U := N
**      loop
**          # Invariant: if T is in X, it is in X[L..U]
**          if L > U then
**              P := 0; break;
**          M := (L+U) div 2
**          case
**              X[M] < T:  L := M+1
**              X[M] = T:  P := M; break
**              X[M] > T:  U := M-1
**
** Search for first occurrence of T in X[1..N] - BinSearch-B
**
**      L := 0; U := N+1
**      while L+1 != U do
**          # Invariant: X[L] < T and X[U] >= T and L < U
**          M := (L+U) div 2
**          if X[M] < T then
**              L := M
**          else
**              U := M
**      # Assert: L+1 = U and X[L] < T and X[U] >= T
**      P := U
**      if P > N or X[P] != T then P := 0
**
** Search for last occurrence of T in X[1..N] - BinSearch-C
** Adapted from BinSearch-B (not in Programming Pearls)
**
**      L := 0; U := N+1
**      while L+1 != U do
**          # Invariant: X[L] <= T and X[U] > T and L < U
**          M := (L+U) div 2
**          if X[M] <= T then
**              L := M
**          else
**              U := M
**      # Assert: L+1 = U and X[L] < T and X[U] >= T
**      P := L
**      if P = 0 or X[P] != T then P := 0
**
** C implementations of these deal with X[0..N-1] instead of X[1..N],
** and return -1 when the value is not found.
*/

int BinSearch_A(int N, const int X[N], int T)
{
    int L = 0;
    int U = N-1;
    while (1)
    {
        /* Invariant: if T is in X, it is in X[L..U] */
        if (L > U)
            return -1;
        int M = (L + U) / 2;
        if (X[M] < T)
            L = M + 1;
        else if (X[M] > T)
            U = M - 1;
        else
            return M;
    }
    assert(0);
}

int BinSearch_B(int N, const int X[N], int T)
{
    int L = -1;
    int U = N;
    while (L + 1 != U)
    {
        /* Invariant: X[L] < T and X[U] >= T and L < U */
        int M = (L + U) / 2;
        if (X[M] < T)
            L = M;
        else
            U = M;
    }
    assert(L+1 == U && (L == -1 || X[L] < T) && (U >= N || X[U] >= T));
    int P = U;
    if (P >= N || X[P] != T)
        P = -1;
    return P;
}

int BinSearch_C(int N, const int X[N], int T)
{
    int L = -1;
    int U = N;
    while (L + 1 != U)
    {
        /* Invariant: X[L] <= T and X[U] > T and L < U */
        int M = (L + U) / 2;
        if (X[M] <= T)
            L = M;
        else
            U = M;
    }
    assert(L+1 == U && (L == -1 || X[L] <= T) && (U >= N || X[U] > T));
    int P = L;
    if (P < 0 || X[P] != T)
        P = -1;
    return P;
}

/*
** If value is found in the array (elements array[0]..array[a_size-1]),
** then the returned Pair identifies the lowest index at which value is
** found (in lo) and the highest value (in hi).  The lo and hi values
** will be the same if there's only one entry that matches.
**
** If the value is not found in the array, the pair (-1, -1) is returned.
**
** -- If the values in the array are unique, then this is not the binary
**    search to use.
** -- If it doesn't matter which one of multiple identical keys are
**    returned, this is not the binary search to use.
**
** ------------------------------------------------------------------------
**
** Another way to look at this is:
** -- Lower bound is largest  index lo such that a[lo] < value and a[lo+1] >= value
** -- Upper bound is smallest index hi such that a[hi] > value and a[hi-1] <= value
** -- Range is then lo+1..hi-1.
** -- If the values is not found, the value (-1, -1) is returned.
**
** Let's review:
** == Data: 2, 3, 3, 3, 5, 5, 6, 8 (N = 8)
** Searches and results:
** == 1 .. lo = -1, hi = -1  R = (-1, -1) not found
** == 2 .. lo = -1, hi =  1  R = ( 0,  0) found
** == 3 .. lo =  0, hi =  4  R = ( 1,  3) found
** == 4 .. lo = -1, hi = -1  R = (-1, -1) not found
** == 5 .. lo =  3, hi =  6  R = ( 4,  5) found
** == 6 .. lo =  5, hi =  7  R = ( 6,  6) found
** == 7 .. lo = -1, hi = -1  R = (-1, -1) not found
** == 8 .. lo =  6, hi =  8  R = ( 7,  7) found
** == 9 .. lo = -1, hi = -1  R = (-1, -1) not found
**
** Code created by combining BinSearch_B() and BinSearch_C() into a
** single function.  The two separate ranges of values to be searched
** (L_lo:L_hi vs U_lo:U_hi) are almost unavoidable as if there are
** repeats in the data, the values diverge.
*/

Pair BinSearch_D(int N, const int X[N], int T)
{
    int L_lo = -1;
    int L_hi = N;
    int U_lo = -1;
    int U_hi = N;

    while (L_lo + 1 != L_hi || U_lo + 1 != U_hi)
    {
        if (L_lo + 1 != L_hi)
        {
            /* Invariant: X[L_lo] < T and X[L_hi] >= T and L_lo < L_hi */
            int L_md = (L_lo + L_hi) / 2;
            if (X[L_md] < T)
                L_lo = L_md;
            else
                L_hi = L_md;
        }
        if (U_lo + 1 != U_hi)
        {
            /* Invariant: X[U_lo] <= T and X[U_hi] > T and U_lo < U_hi */
            int U_md = (U_lo + U_hi) / 2;
            if (X[U_md] <= T)
                U_lo = U_md;
            else
                U_hi = U_md;
        }
    }

    assert(L_lo+1 == L_hi && (L_lo == -1 || X[L_lo] < T) && (L_hi >= N || X[L_hi] >= T));
    int L = L_hi;
    if (L >= N || X[L] != T)
        L = -1;
    assert(U_lo+1 == U_hi && (U_lo == -1 || X[U_lo] <= T) && (U_hi >= N || X[U_hi] > T));
    int U = U_lo;
    if (U < 0 || X[U] != T)
        U = -1;

    return (Pair) { .lo = L, .hi = U };
}

/* Test support code */

static void check_sorted(const char *a_name, int size, const int array[size])
{
    int ok = 1;
    for (int i = 1; i < size; i++)
    {
        if (array[i-1] > array[i])
        {
            fprintf(stderr, "Out of order: %s[%d] = %d, %s[%d] = %d\n",
                    a_name, i-1, array[i-1], a_name, i, array[i]);
            ok = 0;
        }
    }
    if (!ok)
        exit(1);
}

static void dump_array(const char *a_name, int size, const int array[size])
{
    printf("%s Data: ", a_name);
    for (int i = 0; i < size; i++)
        printf(" %2d", array[i]);
    putchar('\n');
}

/* This interface could be used instead of the Pair returned by BinSearch_D() */
static void linear_search(int size, const int array[size], int value, int *lo, int *hi)
{
    *lo = *hi = -1;
    for (int i = 0; i < size; i++)
    {
        if (array[i] == value)
        {
            if (*lo == -1)
                *lo = i;
            *hi = i;
        }
        else if (array[i] > value)
            break;
    }
}

static void test_abc_search(const char *a_name, int size, const int array[size])
{
    dump_array(a_name, size, array);
    check_sorted(a_name, size, array);

    for (int i = array[0] - 1; i < array[size - 1] + 2; i++)
    {
        int a_idx = BinSearch_A(size, array, i);
        int b_idx = BinSearch_B(size, array, i);
        int c_idx = BinSearch_C(size, array, i);
        printf("T %2d:  A %2d,  B %2d,  C %2d\n", i, a_idx, b_idx, c_idx);
        assert(a_idx != -1 || (b_idx == -1 && c_idx == -1));
        assert(b_idx != -1 || (c_idx == -1 && a_idx == -1));
        assert(c_idx != -1 || (a_idx == -1 && b_idx == -1));
        assert(a_idx == -1 || (array[a_idx] == i && array[b_idx] == i && array[c_idx] == i));
        assert(a_idx == -1 || (a_idx >= b_idx && a_idx <= c_idx));
        int lo;
        int hi;
        linear_search(size, array, i, &lo, &hi);
        assert(lo == b_idx && hi == c_idx);
    }
}

static void test_d_search(const char *a_name, int size, const int array[size], const Pair results[])
{
    dump_array(a_name, size, array);
    check_sorted(a_name, size, array);

    for (int i = array[0] - 1, j = 0; i < array[size - 1] + 2; i++, j++)
    {
        Pair result = BinSearch_D(size, array, i);
        printf("%2d: (%d, %d) vs (%d, %d)\n", i, result.lo, result.hi, results[j].lo, results[j].hi);
        int lo;
        int hi;
        linear_search(size, array, i, &lo, &hi);
        assert(lo == result.lo && hi == result.hi);
    }
}

int main(void)
{
    int A[] = { 1, 2, 2, 4, 5, 5, 5, 5, 5, 6, 8, 8, 9, 10 };
    enum { A_SIZE = sizeof(A) / sizeof(A[0]) };

    test_abc_search("A", A_SIZE, A);

    int B[] = { 10, 12, 12, 12, 14, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 18, 18, 18, 19, 19, 19, 19, };
    enum { B_SIZE = sizeof(B) / sizeof(B[0]) };

    test_abc_search("B", B_SIZE, B);

    int C[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, };
    enum { C_SIZE = sizeof(C) / sizeof(C[0]) };

    test_abc_search("C", C_SIZE, C);

    /* Results for BinSearch_D() on array A */
    static const Pair results_A[] =
    {
        { -1, -1 }, {  0,  0 }, {  1,  2 },
        { -1, -1 }, {  3,  3 }, {  4,  8 },
        {  9,  9 }, { -1, -1 }, { 10, 11 },
        { 12, 12 }, { 13, 13 }, { -1, -1 },
    };

    test_d_search("A", A_SIZE, A, results_A);

    int D[] = { 2, 3, 3, 3, 5, 5, 6, 8 };
    enum { D_SIZE = sizeof(D) / sizeof(D[0]) };
    static const Pair results_D[] =
    {
        { -1, -1 }, {  0,  0 }, {  1,  3 },
        { -1, -1 }, {  4,  5 }, {  6,  6 },
        { -1, -1 }, {  7,  7 }, { -1, -1 },
    };

    test_d_search("D", D_SIZE, D, results_D);

    return 0;
}

示例输出:

A Data:   1  2  2  4  5  5  5  5  5  6  8  8  9 10
T  0:  A -1,  B -1,  C -1
T  1:  A  0,  B  0,  C  0
T  2:  A  2,  B  1,  C  2
T  3:  A -1,  B -1,  C -1
T  4:  A  3,  B  3,  C  3
T  5:  A  6,  B  4,  C  8
T  6:  A  9,  B  9,  C  9
T  7:  A -1,  B -1,  C -1
T  8:  A 10,  B 10,  C 11
T  9:  A 12,  B 12,  C 12
T 10:  A 13,  B 13,  C 13
T 11:  A -1,  B -1,  C -1
B Data:  10 12 12 12 14 15 15 15 15 15 15 15 16 16 16 18 18 18 19 19 19 19
T  9:  A -1,  B -1,  C -1
T 10:  A  0,  B  0,  C  0
T 11:  A -1,  B -1,  C -1
T 12:  A  1,  B  1,  C  3
T 13:  A -1,  B -1,  C -1
T 14:  A  4,  B  4,  C  4
T 15:  A 10,  B  5,  C 11
T 16:  A 13,  B 12,  C 14
T 17:  A -1,  B -1,  C -1
T 18:  A 16,  B 15,  C 17
T 19:  A 19,  B 18,  C 21
T 20:  A -1,  B -1,  C -1
C Data:   1  2  3  4  5  6  7  8  9 10 11 12
T  0:  A -1,  B -1,  C -1
T  1:  A  0,  B  0,  C  0
T  2:  A  1,  B  1,  C  1
T  3:  A  2,  B  2,  C  2
T  4:  A  3,  B  3,  C  3
T  5:  A  4,  B  4,  C  4
T  6:  A  5,  B  5,  C  5
T  7:  A  6,  B  6,  C  6
T  8:  A  7,  B  7,  C  7
T  9:  A  8,  B  8,  C  8
T 10:  A  9,  B  9,  C  9
T 11:  A 10,  B 10,  C 10
T 12:  A 11,  B 11,  C 11
T 13:  A -1,  B -1,  C -1
A Data:   1  2  2  4  5  5  5  5  5  6  8  8  9 10
 0: (-1, -1) vs (-1, -1)
 1: (0, 0) vs (0, 0)
 2: (1, 2) vs (1, 2)
 3: (-1, -1) vs (-1, -1)
 4: (3, 3) vs (3, 3)
 5: (4, 8) vs (4, 8)
 6: (9, 9) vs (9, 9)
 7: (-1, -1) vs (-1, -1)
 8: (10, 11) vs (10, 11)
 9: (12, 12) vs (12, 12)
10: (13, 13) vs (13, 13)
11: (-1, -1) vs (-1, -1)
D Data:   2  3  3  3  5  5  6  8
 1: (-1, -1) vs (-1, -1)
 2: (0, 0) vs (0, 0)
 3: (1, 3) vs (1, 3)
 4: (-1, -1) vs (-1, -1)
 5: (4, 5) vs (4, 5)
 6: (6, 6) vs (6, 6)
 7: (-1, -1) vs (-1, -1)
 8: (7, 7) vs (7, 7)
 9: (-1, -1) vs (-1, -1)