如何对__uint128_t(128位)进行正向和反向扫描?

时间:2018-10-05 03:09:22

标签: c++ bit-manipulation 128-bit int128

我已经使用DeBruijn算法完成了64位的位扫描正向/反向操作,但是无法存档128位__uint128_t。有什么解决办法吗?预先感谢!

使用DeBruijn算法进行位扫描的FYI代码正向/反向64位:

constexpr std::uint32_t
bitScanForward<std::uint64_t>(std::uint64_t n) noexcept {
    constexpr std::uint32_t seq[] = {
        0,  47, 1,  56, 48, 27, 2,  60, 57, 49, 41, 37, 28, 16, 3,  61,
        54, 58, 35, 52, 50, 42, 21, 44, 38, 32, 29, 23, 17, 11, 4,  62,
        46, 55, 26, 59, 40, 36, 15, 53, 34, 51, 20, 43, 31, 22, 10, 45,
        25, 39, 14, 33, 19, 30, 9,  24, 13, 18, 8,  12, 7,  6,  5,  63};
    return seq[((n ^ (n - 1)) * 0x03f79d71b4cb0a89ULL) >> 58];
}

constexpr std::uint32_t
bitScanReverse<std::uint64_t>(std::uint64_t n) noexcept {
    n |= n >> 1;
    n |= n >> 2;
    n |= n >> 4;
    n |= n >> 8;
    n |= n >> 16;
    n |= n >> 32;
    constexpr std::uint32_t seq[] = {
        0,  47, 1,  56, 48, 27, 2,  60, 57, 49, 41, 37, 28, 16, 3,  61,
        54, 58, 35, 52, 50, 42, 21, 44, 38, 32, 29, 23, 17, 11, 4,  62,
        46, 55, 26, 59, 40, 36, 15, 53, 34, 51, 20, 43, 31, 22, 10, 45,
        25, 39, 14, 33, 19, 30, 9,  24, 13, 18, 8,  12, 7,  6,  5,  63};
    return seq[(n * 0x03f79d71b4cb0a89ULL) >> 58];
}

1 个答案:

答案 0 :(得分:3)

可以使64位BitScanReverse适应128位的情况,但这不会 非常有效,因为128位乘法和算术相对昂贵, 正如@Marc Glisse在评论中指出的那样。

不过,您可以将64位的BitScanReverse / BitScanForward用作 便携式128位bsf / bsr

#include<stdint.h>
#include<stdio.h>

int bitScanReverse(uint64_t n);
int bitScanForward(uint64_t n);

int bsr_u128 (__uint128_t u) {
  uint64_t hi = u >> 64;
  uint64_t lo = u;
  int hi_neq_0 = (hi != 0); 
  uint64_t hi_or_lo = hi_neq_0 ? hi : lo;
  int bsr_hi_or_lo = bitScanReverse(hi_or_lo);
  return bsr_hi_or_lo + (hi_neq_0 << 6);
}

int bsf_u128 (__uint128_t u) {
  uint64_t hi = u >> 64;
  uint64_t lo = u;
  int lo_eq_0 = (lo == 0); 
  uint64_t hi_or_lo = lo_eq_0 ? hi : lo;
  int bsf_hi_or_lo = bitScanForward(hi_or_lo);
  return bsf_hi_or_lo + (lo_eq_0 << 6);
}


int bitScanReverse(uint64_t n){
    n |= n >> 1;
    n |= n >> 2;
    n |= n >> 4;
    n |= n >> 8;
    n |= n >> 16;
    n |= n >> 32;
    static int seq[] = {
        0,  47, 1,  56, 48, 27, 2,  60, 57, 49, 41, 37, 28, 16, 3,  61,
        54, 58, 35, 52, 50, 42, 21, 44, 38, 32, 29, 23, 17, 11, 4,  62,
        46, 55, 26, 59, 40, 36, 15, 53, 34, 51, 20, 43, 31, 22, 10, 45,
        25, 39, 14, 33, 19, 30, 9,  24, 13, 18, 8,  12, 7,  6,  5,  63};
    return seq[(n * 0x03f79d71b4cb0a89ULL) >> 58];
}


int bitScanForward(uint64_t n){
    static int seq[] = {
        0,  47, 1,  56, 48, 27, 2,  60, 57, 49, 41, 37, 28, 16, 3,  61,
        54, 58, 35, 52, 50, 42, 21, 44, 38, 32, 29, 23, 17, 11, 4,  62,
        46, 55, 26, 59, 40, 36, 15, 53, 34, 51, 20, 43, 31, 22, 10, 45,
        25, 39, 14, 33, 19, 30, 9,  24, 13, 18, 8,  12, 7,  6,  5,  63};
    return seq[((n ^ (n - 1)) * 0x03f79d71b4cb0a89ULL) >> 58];
}


int main(){
    __uint128_t t = 1;
    __uint64_t hi, lo;
    int i;
    for (i=0;i<129;i++){
         lo = t;
         hi = t>>64;
         printf("%3i  %016lX %016lX  bsr = %3i   bsf = %3i\n",i,hi,lo,bsr_u128(t),bsf_u128(t));
         t=t+t;
    }
    t = 1;
    printf("\nThe zero input case is similar in the 64 bit and the 128 bit case:\n\n");
    for (i=0;i<65;i++){
         lo = t;
         printf("%3i  %016lX  bsr = %3i   bsf = %3i\n",i,lo,bitScanReverse(t),bitScanForward(t));
         t=t+t;
    }

    return 0;
}

在x86上,这会导致相当有效的代码,例如(gcc -O3 -m64 -march = nehalem):

bsf_u128:
        xor     eax, eax
        test    rdi, rdi
        cmove   rdi, rsi
        sete    al
        sal     eax, 6
        lea     rsi, [rdi-1]
        xor     rsi, rdi
        movabs  rdi, 285870213051386505
        imul    rsi, rdi
        shr     rsi, 58
        add     eax, DWORD PTR seq.31934[0+rsi*4]
        ret

要测试代码,请在不同位置设置一位。输出为:

$ ./a.exe
  0  0000000000000000 0000000000000001  bsr =   0   bsf =   0
  1  0000000000000000 0000000000000002  bsr =   1   bsf =   1
  2  0000000000000000 0000000000000004  bsr =   2   bsf =   2
....
 62  0000000000000000 4000000000000000  bsr =  62   bsf =  62
 63  0000000000000000 8000000000000000  bsr =  63   bsf =  63
 64  0000000000000001 0000000000000000  bsr =  64   bsf =  64
 65  0000000000000002 0000000000000000  bsr =  65   bsf =  65
....
126  4000000000000000 0000000000000000  bsr = 126   bsf = 126
127  8000000000000000 0000000000000000  bsr = 127   bsf = 127
128  0000000000000000 0000000000000000  bsr =   0   bsf = 127

The zero input case is similar in the 64 bit and the 128 bit case:

  0  0000000000000001  bsr =   0   bsf =   0
  1  0000000000000002  bsr =   1   bsf =   1
  2  0000000000000004  bsr =   2   bsf =   2
....
 62  4000000000000000  bsr =  62   bsf =  62
 63  8000000000000000  bsr =  63   bsf =  63
 64  0000000000000000  bsr =   0   bsf =  63

有效的128位bsf / bsr的另一种解决方案是回收此问题中讨论的想法: Counting the number of leading zeros in a 128-bit integer