Question

我有byte我用来存储位标志。我需要计算字节中最重要的设置位的位置。

示例字节： 00 1 01101 =＆gt; 6是最重要的设置位

的位置

紧凑型十六进制映射：

[0x00]      => 0x00
[0x01]      => 0x01
[0x02,0x03] => 0x02
[0x04,0x07] => 0x03
[0x08,0x0F] => 0x04
[0x10,0x1F] => 0x05
[0x20,0x3F] => 0x06
[0x40,0x7F] => 0x07
[0x80,0xFF] => 0x08

C中的TestCase：

#include <stdio.h>

unsigned char check(unsigned char b) {
  unsigned char c = 0x08;
  unsigned char m = 0x80;
  do {
    if(m&b) { return  c; }
    else    { c -= 0x01; }
  } while(m>>=1);
  return 0; //never reached
}
int main() {
  unsigned char input[256] = {
    0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
    0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
    0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
    0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
    0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
    0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,
    0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
    0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
    0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
    0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
    0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
    0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,
    0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf,
    0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
    0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
    0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff };

  unsigned char truth[256] = {
    0x00,0x01,0x02,0x02,0x03,0x03,0x03,0x03,0x04,0x04,0x04,0x04,0x04,0x04,0x04,0x04, 
    0x05,0x05,0x05,0x05,0x05,0x05,0x05,0x05,0x05,0x05,0x05,0x05,0x05,0x05,0x05,0x05, 
    0x06,0x06,0x06,0x06,0x06,0x06,0x06,0x06,0x06,0x06,0x06,0x06,0x06,0x06,0x06,0x06, 
    0x06,0x06,0x06,0x06,0x06,0x06,0x06,0x06,0x06,0x06,0x06,0x06,0x06,0x06,0x06,0x06, 
    0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07, 
    0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07, 
    0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07, 
    0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07, 
    0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,
    0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,
    0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,
    0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,
    0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,
    0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,
    0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,
    0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08};

  int i,r;
  int f = 0;
  for(i=0; i<256; ++i) {
    r=check(input[i]);
    if(r !=(truth[i])) {
      printf("failed %d : 0x%x : %d\n",i,0x000000FF & ((int)input[i]),r);
      f += 1;
    }
  }
  if(!f) { printf("passed all\n");  }
  else   { printf("failed %d\n",f); }
  return 0;
}

我想简化我的check()函数，不涉及循环（或优选分支）。是否有一些麻烦的哈希或哈希查找表解决方案来计算一个字节中最重要的设置位的位置？

Answer 1

您的问题是计算值log2的有效方法。而且因为你似乎想要一个不仅限于C语言的解决方案，我有点懒，并调整了一些C＃代码。

您想要计算log2(x) + 1和x = 0（其中log2未定义），您将结果定义为0（例如，您创建了一个特殊情况log2(0) = -1）。

static readonly Byte[] multiplyDeBruijnBitPosition = new Byte[] {
  7, 2, 3, 4,
  6, 1, 5, 0
};

public static Byte Log2Plus1(Byte value) {
  if (value == 0)
    return 0;

  var roundedValue = value;
  roundedValue |= (Byte) (roundedValue >> 1);
  roundedValue |= (Byte) (roundedValue >> 2);
  roundedValue |= (Byte) (roundedValue >> 4);
  var log2 = multiplyDeBruijnBitPosition[((Byte) (roundedValue*0xE3)) >> 5];
  return (Byte) (log2 + 1);
}

这个杂乱无章的hack取自Find the log base 2 of an N-bit integer in O(lg(N)) operations with multiply and lookup，您可以在其中看到32位值的等效C源代码。此代码已适用于8位值。

但是，您可以使用一个使用非常有效的内置函数为您提供结果的操作（在许多CPU上使用像Bit Scan Reverse这样的单个指令）。问题Bit twiddling: which bit is set?的答案有一些关于此的信息。答案的引用提供了解决此问题的低级别支持的一个可能原因：

这样的事情是许多O（1）算法的核心，例如内核调度程序需要找到由位数组表示的第一个非空队列。

Answer 2

这是一个有趣的小挑战。我不知道这个是否完全可移植，因为我只有VC ++来测试，我当然不能确定它是否比其他方法更有效。这个版本用循环编码，但它可以不需要太多努力就可以展开。

static unsigned char check(unsigned char b)
{
  unsigned char r = 8;
  unsigned char sub = 1;
  unsigned char s = 7;
  for (char i = 0; i < 8; i++)
  {
      sub = sub & ((( b & (1 << s)) >> s--) - 1);
      r -= sub;
  }
  return r;
}

Answer 3

我确信其他所有人早已转移到其他主题但是我的脑海中有一些东西暗示必须有一个更有效的无分支解决方案，而不仅仅是在我的其他方面展开循环发布解决方案快速访问我的Warren副本，让我走上正轨：二进制搜索。

以下是基于该想法的解决方案：

  Pseudo-code:

  // see if there's a bit set in the upper half   
  if ((b >> 4) != 0)  
  {
      offset = 4;
      b >>= 4;   
  }   
  else
      offset = 0;

  // see if there's a bit set in the upper half of what's left   
  if ((b & 0x0C) != 0)   
  {
    offset += 2;
    b >>= 2;   
  }

  // see if there's a bit set in the upper half of what's left   
  if > ((b & 0x02) != 0)   
  {
    offset++;
    b >>= 1;   
  }

  return b + offset;

无分支C ++实现：

static unsigned char check(unsigned char b)
{    
  unsigned char adj = 4 & ((((unsigned char) - (b >> 4) >> 7) ^ 1) - 1);
  unsigned char offset = adj;
  b >>= adj;
  adj = 2 & (((((unsigned char) - (b & 0x0C)) >> 7) ^ 1) - 1);
  offset += adj;
  b >>= adj;
  adj = 1 & (((((unsigned char) - (b & 0x02)) >> 7) ^ 1) - 1);
  return (b >> adj) + offset + adj;
}

是的，我知道这都是学术上的：）

Answer 4

在简单的C中是不可能的。我建议的最好的是检查的以下实现。虽然相当＆＃34;丑陋＆＃34;我认为它比问题中的 ckeck 版本运行得更快。

int check(unsigned char b)
{
    if(b&128) return 8;
    if(b&64)  return 7;
    if(b&32)  return 6;
    if(b&16)  return 5;
    if(b&8)   return 4;
    if(b&4)   return 3;
    if(b&2)   return 2;
    if(b&1)   return 1;
              return 0;
}

Answer 5

编辑：我找到了指向实际代码的链接：http://www.hackersdelight.org/hdcodetxt/nlz.c.txt 以下算法在该文件中命名为nlz8。你可以选择自己喜欢的黑客。

/*
From last comment of: http://stackoverflow.com/a/671826/315052
> Hacker's Delight explains how to correct for the error in 32-bit floats
> in 5-3 Counting Leading 0's. Here's their code, which uses an anonymous
> union to overlap asFloat and asInt: k = k & ~(k >> 1); asFloat =
> (float)k + 0.5f; n = 158 - (asInt >> 23); (and yes, this relies on
> implementation-defined behavior) - Derrick Coetzee Jan 3 '12 at 8:35
*/

unsigned char check (unsigned char b) {
    union {
        float    asFloat;
        int      asInt;
    } u;
    unsigned k = b & ~(b >> 1);
    u.asFloat = (float)k + 0.5f;
    return 32 - (158 - (u.asInt >> 23));
}

编辑 - 不完全确定提问者的语言独立性，但下面是python中的等效代码。

import ctypes

class Anon(ctypes.Union):
    _fields_ = [
        ("asFloat", ctypes.c_float),
        ("asInt", ctypes.c_int)
    ]

def check(b):
    k = int(b) & ~(int(b) >> 1)
    a = Anon(asFloat=(float(k) + float(0.5)))
    return 32 - (158 - (a.asInt >> 23))

确定字节中最有意义的位的位置

5 个答案: