我试图了解如何修改二进制搜索,以便它适用于第一次和最后一次,当然我可以在网上找到一些代码,但我试图深入理解,这里是我找到了一些基本的非递归二进制搜索:
int BinarySearch(int *array, int number_of_elements, int key)
{
int low = 0, high = number_of_elements-1, mid;
while(low <= high)
{
mid = (low + high)/2;
if(array[mid] < key)
{
low = mid + 1;
}
else if(array[mid] == key)
{
return mid;
}
else if(array[mid] > key)
{
high = mid-1;
}
}
return -1;
}
我需要做哪些修改以及它们背后的逻辑是什么?
编辑:我希望它高效而不是线性完成。
答案 0 :(得分:3)
对包含一组有序值的数组进行二进制搜索,其中值可能出现多次,但不一定会产生第一个或最后一个元素。
它产生它找到的第一个匹配元素。
由于此元素可能被更多匹配元素包围,因此需要第二步,以便找到第一个和最后一个匹配元素。这可以通过其他帖子建议的线性搜索来完成,也可以在对数时间内完成。
让我成为第一个找到的匹配的索引,如二进制搜索所报告的那样。
然后,“等于序列”的开始在[0..i]中。并且“等于序列”的结尾在[i..N-1]中,其中N是序列的长度。递归地将这些间隔一分为二,直到找到边界,最终产生第一个和最后一个匹配。
以下(f#)程序在几行中显示了这个想法。编写等效的C函数应该是一件小事。
let equal_range (a : int[]) i =
let rec first i0 i1 =
if a.[i0] = a.[i1] || (i1-i0) < 2 then
if a.[i0] <> a.[i1]
then
i1
else
i0
else
let mid = (i1 - i0) / 2 + i0
if a.[mid] = a.[i1] then first i0 mid else first mid i1
let rec last i0 i1 =
if a.[i1] = a.[i0] || i1-i0 < 2 then
if a.[i0] <> a.[i1]
then
i0
else
i1
else
let mid = (i1 - i0) / 2 + i0
if a.[mid] = a.[i0] then last mid i1 else last i0 mid
(first 0 i),(last i (Array.length a - 1))
let test_arrays =
[
Array.ofList ([1..4] @ [5;5;5;5;5] @ [6..10])
[|1|]
[|1;1;1;1;1|]
]
test_arrays
|> List.iter(fun a ->
printfn "%A" a
for i = 0 to Array.length a - 1 do
printfn "%d(a.[%d] = %d): %A" i i (a.[i]) (equal_range a i)
)
这里是等效的非递归C代码:
#include <stdio.h>
#include <assert.h>
#include <stdbool.h>
typedef struct IndexPair_tag
{
size_t a;
size_t b;
} IndexPair_t;
bool equal_range(const int * a, size_t n, size_t i, IndexPair_t * result)
{
if (NULL == a) return false;
if (NULL == result) return false;
if (i >= n) return false;
size_t i0, i1, mid;
i0 = 0;
i1 = i;
while (a[i0] != a[i1] && ((i1 - i0) > 1))
{
mid = (i1 - i0) / 2 + i0;
if (a[mid] == a[i1])
{
i1 = mid;
}
else
{
i0 = mid;
}
}
if (a[i0] != a[i1])
result->a = i1;
else
result->a = i0;
i0 = i;
i1 = n - 1;
while (a[i0] != a[i1] && ((i1 - i0) > 1))
{
mid = (i1 - i0) / 2 + i0;
if (a[mid] == a[i0])
{
i0 = mid;
}
else
{
i1 = mid;
}
}
if (a[i0] != a[i1] )
result->b = i0;
else
result->b = i1;
return true;
}
static void ShowArray(int *a, size_t N)
{
if (N > 0)
{
printf("[%d", a[0]);
for (size_t i = 1; i < N; i++)
{
printf(", %d", a[i]);
}
printf("]\n");
}
else
printf("[]\n");
}
int main()
{
{
const size_t N = 14;
int a[N] = { 1,2,3,4,5,5,5,5,5,6,7,8,9,10 };
ShowArray(a, N);
IndexPair_t result;
for (size_t i = 0; i < N; i++)
{
if (equal_range(a, 14, i, &result))
{
printf("%d(a[%d] = %d): (%d,%d)\n", i, i, a[i], result.a, result.b);
assert(a[result.a] == a[result.b]);
}
else
{
printf("For i = %d, equal_range() returned false.\n", i);
assert(false);
}
}
}
{
const size_t N = 1;
int a[N] = { 1 };
ShowArray(a, N);
IndexPair_t result;
for (size_t i = 0; i < N; i++)
{
if (equal_range(a, N, i, &result))
{
printf("%d(a[%d] = %d): (%d,%d)\n", i, i, a[i], result.a, result.b);
assert(a[result.a] == a[result.b]);
}
else
{
printf("For i = %d, equal_range() returned false.\n", i);
assert(false);
}
}
}
{
const size_t N = 5;
int a[N] = { 1,1,1,1,1 };
ShowArray(a, N);
IndexPair_t result;
for (size_t i = 0; i < N; i++)
{
if (equal_range(a, N, i, &result))
{
printf("%d(a[%d] = %d): (%d,%d)\n", i, i, a[i], result.a, result.b);
assert(a[result.a] == a[result.b]);
}
else
{
printf("For i = %d, equal_range() returned false.\n", i);
assert(false);
}
}
}
return 0;
}
更新:乔纳森是对的,功能的设计很草率,并且有一些角落问题。
事实上,函数采用索引,而不是值是正确的,恕我直言,因为它应该是第二步,在产生所查找元素的索引的第一步之后。
答案 1 :(得分:0)
根据我的理解,如果找到您要查找的元素,一个选项是向左或向右执行线性搜索,以找到最后一次出现的元素。
或者,您可以再次使用二分搜索来调整位置,直到下一个位置的元素发生变化。这可以通过修改上面的中间案例来完成;如果找到该元素,则不要返回其位置,直到该位置保持不变为止。
答案 2 :(得分:0)
您可以运行二进制搜索以查找数组中键的任意任意(如果存在)。
接下来,第一次出现在返回的索引的左侧,因此您相应地设置边界并重复运行二进制搜索,直到索引左侧的元素不等于键。在最后一次出现时类似地进行
binary_search(0, n-1, key)
{
/* run binary search, get index of key */
leftmost=min(leftmost, index)
if (array[index-1]==key and leftmost==index)
binary_search(0, index-1, key)
rightmost=max(rightmost, index)
if (array[index+1]==key and rightmost==index)
binary_search(index+1, n-1, key)
}
答案 3 :(得分:0)
只需对元素(e)执行常规二进制搜索。让我们说它返回索引i。 为了找到第一次出现的索引,只需重复搜索[0&lt; - &gt; i-1]上的e,直到搜索不再找到e为止。 类似地,最后一次出现[i + 1&lt; - &gt; n]。
答案 4 :(得分:0)
以下是二元搜索的4种变体,以及测试代码。在抽象术语中,这些搜索有序数组X [1..N],它可能包含重复数据值,用于特定值T.
BinSearch_A()
- 找到X [P] = T的任意索引P. BinSearch_B()
- 找到X [P] = T的最小指数P. BinSearch_C()
- 找到X [P] = Y的最大指数P. BinSearch_D()
- 找到索引的范围P..Q,其中X [P] = T(但是X [P-1]!= T)并且X [Q] = T(但是X) [Q + 1]!= T)。 BinSearch_D()
是问题所需要的,但理解其他三个将有助于理解最终结果。
该代码基于Programming Pearls, 1st Edn第8章的材料。它显示BinSearch_A()
- 它在前面的章节中开发 - 然后将其修改为BinSearch_B()
,作为优化搜索1000个条目数组中的值的第一步。
提供的代码包含基于1的数组的伪代码,基于0的数组的实际C代码,以及广泛验证数据和结果的测试工具。它是330行,其中30行是空白的,大约有100个是注释,12个是标题,类型和声明,90个是4个实现,100个是测试代码。
请注意,如果您知道数组中的值是唯一的,那么BinSearch_D()
不是正确的算法(使用BinSearch_A()
)。同样,如果选择一组相同元素中的哪个元素无关紧要,BinSearch_D()
不是正确的算法(使用BinSearch_A()
)。两个变体BinSearch_B()
和BinSearch_C()
是专用的,但如果您只需要搜索到的值所在的最小索引或最大索引,则工作量比BinSearch_D()
少。
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
typedef struct Pair
{
int lo;
int hi;
} Pair;
extern Pair BinSearch_D(int N, const int X[N], int T);
extern int BinSearch_A(int N, const int X[N], int T);
extern int BinSearch_B(int N, const int X[N], int T);
extern int BinSearch_C(int N, const int X[N], int T);
/*
** Programming Pearls, 1st Edn, 8.3 Major Surgery - Binary Search
**
** In each fragment, P contains the result (0 indicates 'not found').
**
** Search for T in X[1..N] - BinSearch-A
**
** L := 1; U := N
** loop
** # Invariant: if T is in X, it is in X[L..U]
** if L > U then
** P := 0; break;
** M := (L+U) div 2
** case
** X[M] < T: L := M+1
** X[M] = T: P := M; break
** X[M] > T: U := M-1
**
** Search for first occurrence of T in X[1..N] - BinSearch-B
**
** L := 0; U := N+1
** while L+1 != U do
** # Invariant: X[L] < T and X[U] >= T and L < U
** M := (L+U) div 2
** if X[M] < T then
** L := M
** else
** U := M
** # Assert: L+1 = U and X[L] < T and X[U] >= T
** P := U
** if P > N or X[P] != T then P := 0
**
** Search for last occurrence of T in X[1..N] - BinSearch-C
** Adapted from BinSearch-B (not in Programming Pearls)
**
** L := 0; U := N+1
** while L+1 != U do
** # Invariant: X[L] <= T and X[U] > T and L < U
** M := (L+U) div 2
** if X[M] <= T then
** L := M
** else
** U := M
** # Assert: L+1 = U and X[L] < T and X[U] >= T
** P := L
** if P = 0 or X[P] != T then P := 0
**
** C implementations of these deal with X[0..N-1] instead of X[1..N],
** and return -1 when the value is not found.
*/
int BinSearch_A(int N, const int X[N], int T)
{
int L = 0;
int U = N-1;
while (1)
{
/* Invariant: if T is in X, it is in X[L..U] */
if (L > U)
return -1;
int M = (L + U) / 2;
if (X[M] < T)
L = M + 1;
else if (X[M] > T)
U = M - 1;
else
return M;
}
assert(0);
}
int BinSearch_B(int N, const int X[N], int T)
{
int L = -1;
int U = N;
while (L + 1 != U)
{
/* Invariant: X[L] < T and X[U] >= T and L < U */
int M = (L + U) / 2;
if (X[M] < T)
L = M;
else
U = M;
}
assert(L+1 == U && (L == -1 || X[L] < T) && (U >= N || X[U] >= T));
int P = U;
if (P >= N || X[P] != T)
P = -1;
return P;
}
int BinSearch_C(int N, const int X[N], int T)
{
int L = -1;
int U = N;
while (L + 1 != U)
{
/* Invariant: X[L] <= T and X[U] > T and L < U */
int M = (L + U) / 2;
if (X[M] <= T)
L = M;
else
U = M;
}
assert(L+1 == U && (L == -1 || X[L] <= T) && (U >= N || X[U] > T));
int P = L;
if (P < 0 || X[P] != T)
P = -1;
return P;
}
/*
** If value is found in the array (elements array[0]..array[a_size-1]),
** then the returned Pair identifies the lowest index at which value is
** found (in lo) and the highest value (in hi). The lo and hi values
** will be the same if there's only one entry that matches.
**
** If the value is not found in the array, the pair (-1, -1) is returned.
**
** -- If the values in the array are unique, then this is not the binary
** search to use.
** -- If it doesn't matter which one of multiple identical keys are
** returned, this is not the binary search to use.
**
** ------------------------------------------------------------------------
**
** Another way to look at this is:
** -- Lower bound is largest index lo such that a[lo] < value and a[lo+1] >= value
** -- Upper bound is smallest index hi such that a[hi] > value and a[hi-1] <= value
** -- Range is then lo+1..hi-1.
** -- If the values is not found, the value (-1, -1) is returned.
**
** Let's review:
** == Data: 2, 3, 3, 3, 5, 5, 6, 8 (N = 8)
** Searches and results:
** == 1 .. lo = -1, hi = -1 R = (-1, -1) not found
** == 2 .. lo = -1, hi = 1 R = ( 0, 0) found
** == 3 .. lo = 0, hi = 4 R = ( 1, 3) found
** == 4 .. lo = -1, hi = -1 R = (-1, -1) not found
** == 5 .. lo = 3, hi = 6 R = ( 4, 5) found
** == 6 .. lo = 5, hi = 7 R = ( 6, 6) found
** == 7 .. lo = -1, hi = -1 R = (-1, -1) not found
** == 8 .. lo = 6, hi = 8 R = ( 7, 7) found
** == 9 .. lo = -1, hi = -1 R = (-1, -1) not found
**
** Code created by combining BinSearch_B() and BinSearch_C() into a
** single function. The two separate ranges of values to be searched
** (L_lo:L_hi vs U_lo:U_hi) are almost unavoidable as if there are
** repeats in the data, the values diverge.
*/
Pair BinSearch_D(int N, const int X[N], int T)
{
int L_lo = -1;
int L_hi = N;
int U_lo = -1;
int U_hi = N;
while (L_lo + 1 != L_hi || U_lo + 1 != U_hi)
{
if (L_lo + 1 != L_hi)
{
/* Invariant: X[L_lo] < T and X[L_hi] >= T and L_lo < L_hi */
int L_md = (L_lo + L_hi) / 2;
if (X[L_md] < T)
L_lo = L_md;
else
L_hi = L_md;
}
if (U_lo + 1 != U_hi)
{
/* Invariant: X[U_lo] <= T and X[U_hi] > T and U_lo < U_hi */
int U_md = (U_lo + U_hi) / 2;
if (X[U_md] <= T)
U_lo = U_md;
else
U_hi = U_md;
}
}
assert(L_lo+1 == L_hi && (L_lo == -1 || X[L_lo] < T) && (L_hi >= N || X[L_hi] >= T));
int L = L_hi;
if (L >= N || X[L] != T)
L = -1;
assert(U_lo+1 == U_hi && (U_lo == -1 || X[U_lo] <= T) && (U_hi >= N || X[U_hi] > T));
int U = U_lo;
if (U < 0 || X[U] != T)
U = -1;
return (Pair) { .lo = L, .hi = U };
}
/* Test support code */
static void check_sorted(const char *a_name, int size, const int array[size])
{
int ok = 1;
for (int i = 1; i < size; i++)
{
if (array[i-1] > array[i])
{
fprintf(stderr, "Out of order: %s[%d] = %d, %s[%d] = %d\n",
a_name, i-1, array[i-1], a_name, i, array[i]);
ok = 0;
}
}
if (!ok)
exit(1);
}
static void dump_array(const char *a_name, int size, const int array[size])
{
printf("%s Data: ", a_name);
for (int i = 0; i < size; i++)
printf(" %2d", array[i]);
putchar('\n');
}
/* This interface could be used instead of the Pair returned by BinSearch_D() */
static void linear_search(int size, const int array[size], int value, int *lo, int *hi)
{
*lo = *hi = -1;
for (int i = 0; i < size; i++)
{
if (array[i] == value)
{
if (*lo == -1)
*lo = i;
*hi = i;
}
else if (array[i] > value)
break;
}
}
static void test_abc_search(const char *a_name, int size, const int array[size])
{
dump_array(a_name, size, array);
check_sorted(a_name, size, array);
for (int i = array[0] - 1; i < array[size - 1] + 2; i++)
{
int a_idx = BinSearch_A(size, array, i);
int b_idx = BinSearch_B(size, array, i);
int c_idx = BinSearch_C(size, array, i);
printf("T %2d: A %2d, B %2d, C %2d\n", i, a_idx, b_idx, c_idx);
assert(a_idx != -1 || (b_idx == -1 && c_idx == -1));
assert(b_idx != -1 || (c_idx == -1 && a_idx == -1));
assert(c_idx != -1 || (a_idx == -1 && b_idx == -1));
assert(a_idx == -1 || (array[a_idx] == i && array[b_idx] == i && array[c_idx] == i));
assert(a_idx == -1 || (a_idx >= b_idx && a_idx <= c_idx));
int lo;
int hi;
linear_search(size, array, i, &lo, &hi);
assert(lo == b_idx && hi == c_idx);
}
}
static void test_d_search(const char *a_name, int size, const int array[size], const Pair results[])
{
dump_array(a_name, size, array);
check_sorted(a_name, size, array);
for (int i = array[0] - 1, j = 0; i < array[size - 1] + 2; i++, j++)
{
Pair result = BinSearch_D(size, array, i);
printf("%2d: (%d, %d) vs (%d, %d)\n", i, result.lo, result.hi, results[j].lo, results[j].hi);
int lo;
int hi;
linear_search(size, array, i, &lo, &hi);
assert(lo == result.lo && hi == result.hi);
}
}
int main(void)
{
int A[] = { 1, 2, 2, 4, 5, 5, 5, 5, 5, 6, 8, 8, 9, 10 };
enum { A_SIZE = sizeof(A) / sizeof(A[0]) };
test_abc_search("A", A_SIZE, A);
int B[] = { 10, 12, 12, 12, 14, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 18, 18, 18, 19, 19, 19, 19, };
enum { B_SIZE = sizeof(B) / sizeof(B[0]) };
test_abc_search("B", B_SIZE, B);
int C[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, };
enum { C_SIZE = sizeof(C) / sizeof(C[0]) };
test_abc_search("C", C_SIZE, C);
/* Results for BinSearch_D() on array A */
static const Pair results_A[] =
{
{ -1, -1 }, { 0, 0 }, { 1, 2 },
{ -1, -1 }, { 3, 3 }, { 4, 8 },
{ 9, 9 }, { -1, -1 }, { 10, 11 },
{ 12, 12 }, { 13, 13 }, { -1, -1 },
};
test_d_search("A", A_SIZE, A, results_A);
int D[] = { 2, 3, 3, 3, 5, 5, 6, 8 };
enum { D_SIZE = sizeof(D) / sizeof(D[0]) };
static const Pair results_D[] =
{
{ -1, -1 }, { 0, 0 }, { 1, 3 },
{ -1, -1 }, { 4, 5 }, { 6, 6 },
{ -1, -1 }, { 7, 7 }, { -1, -1 },
};
test_d_search("D", D_SIZE, D, results_D);
return 0;
}
示例输出:
A Data: 1 2 2 4 5 5 5 5 5 6 8 8 9 10
T 0: A -1, B -1, C -1
T 1: A 0, B 0, C 0
T 2: A 2, B 1, C 2
T 3: A -1, B -1, C -1
T 4: A 3, B 3, C 3
T 5: A 6, B 4, C 8
T 6: A 9, B 9, C 9
T 7: A -1, B -1, C -1
T 8: A 10, B 10, C 11
T 9: A 12, B 12, C 12
T 10: A 13, B 13, C 13
T 11: A -1, B -1, C -1
B Data: 10 12 12 12 14 15 15 15 15 15 15 15 16 16 16 18 18 18 19 19 19 19
T 9: A -1, B -1, C -1
T 10: A 0, B 0, C 0
T 11: A -1, B -1, C -1
T 12: A 1, B 1, C 3
T 13: A -1, B -1, C -1
T 14: A 4, B 4, C 4
T 15: A 10, B 5, C 11
T 16: A 13, B 12, C 14
T 17: A -1, B -1, C -1
T 18: A 16, B 15, C 17
T 19: A 19, B 18, C 21
T 20: A -1, B -1, C -1
C Data: 1 2 3 4 5 6 7 8 9 10 11 12
T 0: A -1, B -1, C -1
T 1: A 0, B 0, C 0
T 2: A 1, B 1, C 1
T 3: A 2, B 2, C 2
T 4: A 3, B 3, C 3
T 5: A 4, B 4, C 4
T 6: A 5, B 5, C 5
T 7: A 6, B 6, C 6
T 8: A 7, B 7, C 7
T 9: A 8, B 8, C 8
T 10: A 9, B 9, C 9
T 11: A 10, B 10, C 10
T 12: A 11, B 11, C 11
T 13: A -1, B -1, C -1
A Data: 1 2 2 4 5 5 5 5 5 6 8 8 9 10
0: (-1, -1) vs (-1, -1)
1: (0, 0) vs (0, 0)
2: (1, 2) vs (1, 2)
3: (-1, -1) vs (-1, -1)
4: (3, 3) vs (3, 3)
5: (4, 8) vs (4, 8)
6: (9, 9) vs (9, 9)
7: (-1, -1) vs (-1, -1)
8: (10, 11) vs (10, 11)
9: (12, 12) vs (12, 12)
10: (13, 13) vs (13, 13)
11: (-1, -1) vs (-1, -1)
D Data: 2 3 3 3 5 5 6 8
1: (-1, -1) vs (-1, -1)
2: (0, 0) vs (0, 0)
3: (1, 3) vs (1, 3)
4: (-1, -1) vs (-1, -1)
5: (4, 5) vs (4, 5)
6: (6, 6) vs (6, 6)
7: (-1, -1) vs (-1, -1)
8: (7, 7) vs (7, 7)
9: (-1, -1) vs (-1, -1)