我写了一个Hwang-Lin合并算法(A simple algorithm for merging two disjoint linearly ordered sets)的实现。虽然它有效,但结果却很慢。这是我的实施:
template<typename Iter>
void hwang_lin_buf(Iter first1, Iter first2, Iter last, Iter buf_start) {
ptrdiff_t m = first2 - first1, n = last - first2, buff_size = m;
swap_blocks(first1, buf_start, m);
block_rotate(first1, first2, last);// B first1 - first1 + n, A buf_start - buf_start + m
Iter a = buf_start, b = first1, buffer = first1 + n, buffer_end = buffer + buff_size;
while (m > 0 && n > 0) {
if (n > m) {
size_t alpha = (size_t) floor(log2((double) n / m)), x = n - pow(2, alpha) + 1;
if (*(a + m - 1) < *(b + x - 1)) {
block_rotate(b + x - 1, buffer, buffer_end); //pull out the set of all elements in B >= bx
buffer = b + x - 1; //new buffer position
buffer_end = buffer + buff_size;
n = x - 1;
} else {
Iter place = binary_search(b + x - 1, b + n - 1, a + m - 1);
if (place != b + n) {
block_rotate(place, buffer, buffer_end); //pull out the set of all elements in B >= am
buffer = place;
buffer_end = buffer + buff_size;
}
std::swap(*(a + m - 1), *(buffer_end - 1)); //insert am
--buff_size;
buffer_end = buffer + buff_size;
--m;
n = place - b;
}
} else {
size_t alpha = (size_t) floor(log2((double) m / n)), x = m - std::pow(2, alpha) + 1;
if (*(b + n - 1) < *(a + x - 1)) {
swap_blocks(a + x - 1, buffer_end - m + x - 1,
m - (x - 1)); //pull out the set of all elements in A >= ax
buff_size -= m - (x - 1);
buffer_end = buffer + buff_size;
m = x - 1;
} else {
Iter place = binary_search(a + x - 1, a + m - 1, b + n - 1);
ptrdiff_t c_size = a + m - place;
swap_blocks(place, buffer_end - c_size, c_size); //pull out the set of all elements in A >= bn
buff_size -= c_size;
buffer_end = buffer + buff_size;
block_rotate(b + n - 1, buffer, buffer_end); //insert bn
--buffer;
buffer_end = buffer + buff_size;
--n;
m -= c_size;
}
}
}
if (m > 0)
swap_blocks(a, buffer, m);
}
以下是其中使用的函数:
template<typename Iter>
inline void swap_blocks(Iter pos1, Iter pos2, size_t len) {
for (size_t i = 0; i < len; ++i)
std::swap(*(pos1 + i), *(pos2 + i));
}
template<typename Iter>
inline Iter binary_search(Iter be, Iter en, Iter key) {//returns first iter bigger than key if nothing found
while (be < en) {
Iter mid = be + (en - be) / 2;
if (*mid < *key)
be = mid + 1;
else
en = mid;
}
if (((en == be) && (*be == *key)) || (*be > *key))
return be;
else
return be + 1;
}
template<typename Iter>
inline void floating_hole(Iter pos1, Iter pos2, size_t len) {
auto t = *(pos1 - 1);
for (size_t i = 0; i < len; ++i) {
*(pos1 + i - 1) = *(pos2 + i);
*(pos2 + i) = *(pos1 + i);
}
*(pos1 + len - 1) = t;
}
template<typename Iter>
inline void block_rotate(Iter first1, Iter first2, Iter last) {
ptrdiff_t i = first2 - first1, j = last - first2;
if (i == 0 || j == 0)
return;
Iter p = first1 + i;
while (i != j) {
if (i > j) {
swap_blocks(p - i, p, j);
i -= j;
} else {
swap_blocks(p - i, p + j - i, i);
j -= i;
}
}
swap_blocks(p - i, p, i);
}
我运行测试以将我的实现与std::inplace_merge
进行比较。我希望我的代码有点慢,但我得到了以下结果:将100000(50000的两部分)int64_t
的向量与-O3
优化合并:
Hwang-Lin:5.72054 s
std :: inplace_merge:0.0010003 s
std::inplace_merge
快5000倍!我错过了什么吗?