直插和二进制插入排序

时间:2013-03-03 11:09:26

标签: c++ sorting data-structures binary-search

您好我被要求通过使用二分搜索而不是线性来改进插入排序。问题是由于数据比较,现在最好的情况是O(n log n)而不是O(n),这使得程序在某些情况下不会更慢。这是我的二进制插入排序代码:

void InsertionSort(int data[], int size)
{
    int index = -1;
    for(int i = 1,temp,j;i<size;i++)
    {
        temp = data[i];//DM O(N)
        int high = i,low = 0,mid;//DM O(N)
        while(low <= high)//DC O(nlogn)
        {
            mid = (low + high) /2;
            if(temp < data[mid])
            {
                high = mid - 1;
                index = mid;
            }

            else if (temp > data[mid])
            {
                low = mid + 1;      
            }

            else if(data[mid] == temp)
            {
                index = mid;
                break; 
            } 

        }
        for(j = i;j > index;j--)
        {
            data[j] = data[j-1];//DC Worst Case O(n*n) but the exact is summation of n(n+1) / 2 nad best case o(1)
        }
        data[j] = temp;//DM O(n)
    }   
}

3 个答案:

答案 0 :(得分:0)

您可以使用有利于最佳情况的偏向阶段开始二元搜索。不要直接转到(low+high)/2,而是从位置i-1开始,然后是i-2,然后是i-4i-8i-16,{{1} } ...直到找到较小的元素,或直到i-32低于i-whatever。然后继续普通二进制搜索。

请注意,此优化需要付出代价。最好的情况---排序或几乎排序的数据---需要O(N)时间,但相对于简单的二进制搜索版本,平均情况和最差情况会慢一些。

low

另请注意,void InsertionSort (int data[], int size) { int i, j, high, low, mid, hop; int temp; for (i=1; i<size; i++) { temp = data[i]; high = i; low = 0; hop = 1; do { mid = high - hop; hop <<= 1; if (temp < data[mid]) high = mid; else low = mid + 1; } while (low+hop <= high); while (low != high) { mid = (low + high) / 2; if (temp < data[mid]) high = mid; else low = mid + 1; } for(j=i; j>low; j--) data[j] = data[j-1]; data[j] = temp; } } 已分配high而非mid。完全按照mid+1的情况处理temp==data[mid]的情况。这是为了保持插入排序的良好属性:它是stable sort。但是,在排序普通整数时没有区别。

答案 1 :(得分:0)

这是我的二进制插入排序版本与普通插入排序的比较。我的基准时间是决定性的。二进制版本要快得多。

#include <iostream>

#define NB_VALUE 1000
#define VALUE_RANGE 200

class Profiler
{
    
    std::chrono::time_point<std::chrono::high_resolution_clock> initTime;
    const char *m_name;
    public:
    Profiler(const char *name = nullptr): initTime(std::chrono::high_resolution_clock::now()), m_name(name){}
    ~Profiler()
    {
        if(m_name)
             std::cout << m_name << " ";
        auto begin = std::chrono::time_point_cast<std::chrono::microseconds>(initTime).time_since_epoch();
        auto now = std::chrono::time_point_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now()).time_since_epoch();
        
        auto duration = now - begin;
        double millisecond = duration.count() * 0.001;
        std::cout << millisecond << " ms\n";
    }
};

template <class T>
void swap(T &t1, T &t2)
{
    T tmp = t1;
    t1 = t2;
    t2 = tmp;
}

template<class T>
int binaryFindIndexOfSmallestBiggerOrEqual(T array[], unsigned int begin, unsigned int end, T value)
{
      int delta = end - begin;
      if(delta < 0)
           return -1;
    
    if(delta == 0)
    {
          if(array[end] > value)
                return end;
             else
                return -1;
    }
    
    int midIndex = begin + delta / 2;
    
    if(  array[midIndex] < value )
    {
           return binaryFindIndexOfSmallestBiggerOrEqual(array, midIndex+1, end, value);
    }
    else
    {
        if(midIndex == 0 || array[midIndex -1] <= value)
            return midIndex;
        else
            return binaryFindIndexOfSmallestBiggerOrEqual(array, begin, midIndex - 1, value);
    }   
}

template <class T>
void binaryInsertionSort(T array[], unsigned int begin, unsigned int end)
{
    if(end <= begin)
        return;
        
    int sortedUpTo = begin;
    int insertionIndex;
    while(sortedUpTo < end)
    {
        if(array[sortedUpTo] < array[sortedUpTo + 1])
            insertionIndex = -1; 
        else
             insertionIndex = binaryFindIndexOfSmallestBiggerOrEqual(array, begin, sortedUpTo, array[sortedUpTo + 1]);

        if(insertionIndex != -1)
        {
            T tmp = array[sortedUpTo + 1];
            memmove(array + (insertionIndex + 1), array + (insertionIndex), sizeof(T)*(sortedUpTo - insertionIndex + 1));
            array[insertionIndex] = tmp;
        }
        ++sortedUpTo;
    }
}   

template <class T>
void insertionSort(T array[], unsigned int begin, unsigned int end)
{
    for (int i = begin + 1; i <= end; ++i)
    {
        for (int j = i; j > begin + 1; --j)
        {
            if (array[j - 1] > array[j])
                swap(array[j - 1], array[j]);
            else
                break;
        }
    }
}

int main(int argc, char **argv)
{
     int arrayToSort[NB_VALUE];
    
//******** RANDOM CASE *********
    for (int i = 0; i < NB_VALUE; ++i)
      arrayToSort[i] = rand() % (VALUE_RANGE + 1) * 2 - VALUE_RANGE;
    
    Profiler *p = new Profiler();
    binaryInsertionSort(arrayToSort, 0, NB_VALUE - 1);
    delete p;
    //Timer : 0.119ms
    
    for (int i = 0; i < NB_VALUE; ++i)
    arrayToSort[i] = rand() % (VALUE_RANGE + 1) * 2 - VALUE_RANGE;
    
    p = new Profiler();
    insertionSort(arrayToSort, 0, NB_VALUE - 1);
    delete p;
    //Timer : 1 989ms
    
//********* ALREADY SORTED CASE ************
   for (int i = 0; i < NB_VALUE; ++i)
    arrayToSort[i] = i;
    
    p = new Profiler();
    binaryInsertionSort(arrayToSort, 0, NB_VALUE - 1);
    delete p;
    //Timer : 0.003ms
  
    for (int i = 0; i < NB_VALUE; ++i)
    arrayToSort[i] = i;
    
    p = new Profiler();
    insertionSort(arrayToSort, 0, NB_VALUE - 1);
    delete p;
    //Timer : 0.004ms
    
//********* REVERSED ORDER CASE ************
   for (int i = 0; i < NB_VALUE; ++i)
    arrayToSort[i] = NB_VALUE - 1;
    
    p = new Profiler();
    binaryInsertionSort(arrayToSort, 0, NB_VALUE - 1);
    delete p;
    //Timer : 0.046ms
  
    for (int i = 0; i < NB_VALUE; ++i)
    arrayToSort[i] = NB_VALUE - i;
    
    p = new Profiler();
    insertionSort(arrayToSort, 0, NB_VALUE - 1);
    delete p;
    //Timer : 3 878ms
    
    return 1;
}

答案 2 :(得分:-1)

您还可以使用简单else if(data[mid] == temp)替换最后一个:else 因为如果前两者不正确,那显然是真的......