问题是
是否还有其他(和/或更快)的基本2log实现?
应用
log2(int)和log2(float)操作在许多不同的上下文中非常有用。仅举几例:压缩算法,3d引擎和机器学习。在几乎所有这些上下文中,它们都被用在被称为数十亿次的低级代码中......尤其是log2(int)操作非常有用。
因为我发现自己一直使用log2,所以我不想给出我正在处理的特定应用程序。同样的事实是,这是一个真正的性能排水器(如各种应用的性能测试所示)。对我来说,尽可能快地获得这个是关键。
在底部添加了测试所有实现的完整源代码,因此您可以自己查看。
当然......至少运行3次测试并确保计数器大到足以达到多秒。我也做'添加'操作,以确保整个循环不被JIT'ter神奇地删除。让我们开始真正的工作。
琐碎的实施
C#中2log的简单实现是:
(int)(Math.Log(x) / Math.Log(2))
这个实现很简单,但也很慢。它需要2个Log操作,这本身就很慢。当然,我们可以通过使1.0/Math.Log(2)
成为常量来优化这一点。
请注意,我们需要稍微修改此常量以获得正确的结果(作为浮点错误的结果)或添加一个小数字以获得正确的结果。我选择了后者,但这并不重要 - 最终结果在所有情况下都很慢。
表格查询
更快的解决方案是使用查找表。虽然你可以使用查找表 任何2的幂,我通常使用256或64K表的大小表。
首先我们创建查找表:
lookup = new int[256];
for (int i = 1; i < 256; ++i)
{
lookup[i] = (int)(Math.Log(i) / Math.Log(2));
}
接下来,我们按如下方式实施2log:
private static int LogLookup(int i)
{
if (i >= 0x1000000) { return lookup[i >> 24] + 24; }
else if (i >= 0x10000) { return lookup[i >> 16] + 16; }
else if (i >= 0x100) { return lookup[i >> 8] + 8; }
else { return lookup[i]; }
}
正如您所看到的,表查找是一个更快,更快的实现 - 但作为一个con,它不能用于计算log2(float)
。
删除分支
众所周知,处理器不是很擅长分支,所以我认为通过删除分支可以改进表查找。而不是if的串,我引入了第二个表,其中包含值和移位位以查找表中的条目:
nobranch = new int[16] { 0, 0, 8, 8, 16, 16, 16, 16, 24, 24, 24, 24, 24, 24, 24, 24 };
private static int LogDoubleLookup(int i)
{
int n = (i | (i >> 4));
n = (n | (n >> 2));
n = (n | (n >> 1));
n = ((n & 0x1000000) >> 21) | ((n & 0x10000) >> 14) | ((n & 0x100) >> 7) | (n & 1);
int br = nobranch[n];
return lookup[i >> br] + br;
}
如果你运行这个测试,你会发现它实际上比if-then-else解决方案慢。
然后是英特尔80386
英特尔多年前就知道这是一项重要的操作,因此他们在其处理器中实现了位扫描转发(BSF)。其他处理器有类似的指令。这是迄今为止我所知道的2log最快的方法 - 但不幸的是我现在知道如何使用C#中的这些不错的函数...我不喜欢有一个不再运行的实现的想法当新的平板电脑或手机进入市场时 - 我不知道任何跨平台的解决方案使我能够直接使用此功能。
其他实施
正如l4V指出的那样(谢谢!)还有其他一些实现,特别是:
TestTrivial
。TestFloat
TestDeBruijn
TestBinary
除了我喜欢这个名字之外,DeBruijn查找表和普通的查找表一样快,这使得它成为这里最快的算法之一......我尝试过的所有其他算法都慢得多。
完整的测试代码
public class Log2Test
{
public static void TestNaive()
{
Stopwatch sw = new Stopwatch();
sw.Start();
int n = 0;
for (int i = 1; i < 100000000; ++i)
{
n += (int)(Math.Log(i) / Math.Log(2.0));
}
sw.Stop();
Console.WriteLine("Result: {0} - naive implementation took {1:0.000}s", n, sw.Elapsed.TotalSeconds);
}
public static int LogTrivialLoop(int v)
{
int r = 0;
while ((v >>= 1) > 0) // unroll for more speed...
{
r++;
}
return r;
}
public static void TestTrivialLoop()
{
Stopwatch sw = new Stopwatch();
sw.Start();
int n = 0;
for (int i = 1; i < 100000000; ++i)
{
n += LogTrivialLoop(i);
}
sw.Stop();
Console.WriteLine("Result: {0} - loop implementation took {1:0.000}s", n, sw.Elapsed.TotalSeconds);
}
public static int LogFloat(int v)
{
Helper h = new Helper() { U1 = v, U2 = 0x43300000 };
h.D -= 4503599627370496.0;
return (h.U2 >> 20) - 0x3FF;
}
public static void TestFloat()
{
Stopwatch sw = new Stopwatch();
sw.Start();
int n = 0;
for (int i = 1; i < 100000000; ++i)
{
n += LogFloat(i);
}
sw.Stop();
Console.WriteLine("Result: {0} - IEEE float implementation took {1:0.000}s", n, sw.Elapsed.TotalSeconds);
}
[StructLayout(LayoutKind.Explicit)]
private struct Helper
{
[FieldOffset(0)]
public int U1;
[FieldOffset(4)]
public int U2;
[FieldOffset(0)]
public double D;
}
public static void TestConstant()
{
double c = 1.0 / Math.Log(2.0);
Stopwatch sw = new Stopwatch();
sw.Start();
int n = 0;
for (int i = 1; i < 100000000; ++i)
{
n += (int)(0.00000000001 + Math.Log(i) * c);
}
sw.Stop();
Console.WriteLine("Result: {0} - naive 2 implementation took {1:0.000}s", n, sw.Elapsed.TotalSeconds);
}
private static int LogLookup(int i)
{
if (i >= 0x1000000) { return lookup[i >> 24] + 24; }
else if (i >= 0x10000) { return lookup[i >> 16] + 16; }
else if (i >= 0x100) { return lookup[i >> 8] + 8; }
else { return lookup[i]; }
}
public static void TestLookup()
{
lookup = new int[256];
for (int i = 1; i < 256; ++i)
{
lookup[i] = (int)(Math.Log(i) / Math.Log(2));
}
Stopwatch sw = new Stopwatch();
sw.Start();
int n = 0;
for (int i = 1; i < 100000000; ++i)
{
n += LogLookup(i);
}
sw.Stop();
Console.WriteLine("Result: {0} - table lookup implementation took {1:0.000}s", n, sw.Elapsed.TotalSeconds);
}
private static int LogDoubleLookup(int i)
{
int n = (i | (i >> 4));
n = (n | (n >> 2));
n = (n | (n >> 1));
n = ((n & 0x1000000) >> 21) | ((n & 0x10000) >> 14) | ((n & 0x100) >> 7) | (n & 1);
int br = nobranch[n];
return lookup[i >> br] + br;
}
public static void TestDoubleLookup()
{
// Lookup table was already constructed earlier
Stopwatch sw = new Stopwatch();
sw.Start();
int n = 0;
for (int i = 1; i < 100000000; ++i)
{
n += LogDoubleLookup(i);
}
sw.Stop();
Console.WriteLine("Result: {0} - double table lookup implementation took {1:0.000}s", n, sw.Elapsed.TotalSeconds);
}
private static int LogBinary(int v)
{
/* This is the worst implementation ever... - apparently C# is a slow-branching language
int[] b = { 0x2, 0xC, 0xF0, 0xFF00, 0x7FFF0000 };
int[] S = { 1, 2, 4, 8, 16 };
int r = 0; // result of log2(v) will go here
for (int i = 4; i >= 0; i--) // unroll for speed...
{
if ((v & b[i]) != 0)
{
v >>= S[i];
r |= S[i];
}
}
return r;
*/
int r = (((v > 0xFFFF)) ? 0x10 : 0);
v >>= r;
int shift = ((v > 0xFF) ? 0x8 : 0);
v >>= shift;
r |= shift;
shift = ((v > 0xF) ? 0x4 : 0);
v >>= shift;
r |= shift;
shift = ((v > 0x3) ? 0x2 : 0);
v >>= shift;
r |= shift;
r |= (v >> 1);
return r;
}
public static void TestBinary()
{
// Lookup table was already constructed earlier
Stopwatch sw = new Stopwatch();
sw.Start();
int n = 0;
for (int i = 1; i < 100000000; ++i)
{
n += LogBinary(i);
}
sw.Stop();
Console.WriteLine("Result: {0} - binary search implementation took {1:0.000}s", n, sw.Elapsed.TotalSeconds);
}
private static readonly int[] MultiplyDeBruijnBitPosition = new int[32]
{
0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30,
8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31
};
private static int LogDeBruijn(int v)
{
v |= v >> 1; // first round down to one less than a power of 2
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;
return MultiplyDeBruijnBitPosition[(uint)(v * 0x07C4ACDDU) >> 27];
}
public static void TestDeBruijn()
{
// Lookup table was already constructed earlier
Stopwatch sw = new Stopwatch();
sw.Start();
int n = 0;
for (int i = 1; i < 100000000; ++i)
{
n += LogDeBruijn(i);
}
sw.Stop();
Console.WriteLine("Result: {0} - de Bruijn implementation took {1:0.000}s", n, sw.Elapsed.TotalSeconds);
}
private static int[] lookup;
private static readonly int[] nobranch = new int[16] { 0, 0, 8, 8, 16, 16, 16, 16, 24, 24, 24, 24, 24, 24, 24, 24 };
static void Main(string[] args)
{
TestConstant();
TestNaive();
TestDeBruijn();
TestBinary();
TestFloat();
TestTrivialLoop();
TestLookup();
TestDoubleLookup();
Console.ReadLine();
}
}
答案 0 :(得分:3)
There are some integer algorithms here
在C#中:
public static uint FloorLog2(uint x)
{
x |= (x >> 1);
x |= (x >> 2);
x |= (x >> 4);
x |= (x >> 8);
x |= (x >> 16);
return (uint)(NumBitsSet(x) - 1);
}
public static uint CeilingLog2(uint x)
{
int y = (int)(x & (x - 1));
y |= -y;
y >>= (WORDBITS - 1);
x |= (x >> 1);
x |= (x >> 2);
x |= (x >> 4);
x |= (x >> 8);
x |= (x >> 16);
return (uint)(NumBitsSet(x) - 1 - y);
}
public static int NumBitsSet(uint x)
{
x -= ((x >> 1) & 0x55555555);
x = (((x >> 2) & 0x33333333) + (x & 0x33333333));
x = (((x >> 4) + x) & 0x0f0f0f0f);
x += (x >> 8);
x += (x >> 16);
return (int)(x & 0x0000003f);
}
private const int WORDBITS = 32;
你应该查看我为上下文链接的网站上的原始代码,特别是Log2(0)会发生什么。
答案 1 :(得分:3)
采用已提及的二进制解决方案并删除分支。做了一些测试,结果比DeBruijn快1.3倍。
public static int Log2(int v)
{
int r = 0xFFFF - v >> 31 & 0x10;
v >>= r;
int shift = 0xFF - v >> 31 & 0x8;
v >>= shift;
r |= shift;
shift = 0xF - v >> 31 & 0x4;
v >>= shift;
r |= shift;
shift = 0x3 - v >> 31 & 0x2;
v >>= shift;
r |= shift;
r |= (v >> 1);
return r;
}
答案 2 :(得分:2)
有关更多算法,请查看此处http://www.asmcommunity.net/forums/topic/?id=15010
还在C ++中进行了一些测试,我的BSR实现比查找表慢
代码:
//---------------------------------------------------------------------------
DWORD log2_slow(const DWORD &x)
{
DWORD m,i;
if (!x) return 0;
if (x>=0x80000000) return 31;
for (m=1,i=0;m<x;m<<=1,i++);
if (m!=x) i--;
return i;
}
//---------------------------------------------------------------------------
DWORD log2_asm(const DWORD &x)
{
DWORD xx=x;
asm {
mov eax,xx
bsr eax,eax;
mov xx,eax;
}
return xx;
}
//---------------------------------------------------------------------------
BYTE _log2[2048]=
{
0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,
10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,
10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,
10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,
10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,
10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,
10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,
10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,
};
DWORD log2(const DWORD &x)
{
if (x>=0x00400000) return _log2[x>>22]+22;
else if (x>=0x00000800) return _log2[x>>11]+11;
else return _log2[x];
}
//---------------------------------------------------------------------------
测试代码:
DWORD x,j,i,n=256;
tbeg(); for (i=0;i<32;i++) for (j=0;j<n;j++) x=log2 (j<<i); tend(); mm_log->Lines->Add(tstr(1));
tbeg(); for (i=0;i<32;i++) for (j=0;j<n;j++) x=log2_asm (j<<i); tend(); mm_log->Lines->Add(tstr(1));
tbeg(); for (i=0;i<32;i++) for (j=0;j<n;j++) x=log2_slow(j<<i); tend(); mm_log->Lines->Add(tstr(1));
我对AMD A8-5500 3.2 GHz的结果:
[ 0.040 ms] log2 (x) - 11bit lookup table
[ 0.060 ms] log2_asm (x) - BSR
[ 0.415 ms] log2_slow(x) - shift loop
注意:强>
答案 3 :(得分:1)
select doc_unique_id from docs ORDER BY CAST(SUBSTRING_INDEX(doc_unique_id, '-', -1) AS UNSIGNED) ASC;
答案 4 :(得分:1)
这是C#的log2(int)最快的实现:
[StructLayout(LayoutKind.Explicit)]
private struct ConverterStruct
{
[FieldOffset(0)] public int asInt;
[FieldOffset(0)] public float asFloat;
}
public static int Log2(uint val)
{
ConverterStruct a; a.asInt = 0; a.asFloat = val;
return ((a.asInt >> 23 )+ 1) & 0x1F;
}
注意: 在浮点数中使用指数的原因来自SPWorley 3/22/2009。在生产代码上请谨慎使用,因为这在具有低字节序的架构上会失败。
如果您想要安全的“字节顺序”,请查看spender 5/3/2012。它还具有零支持。
以下是一些基准:(此处的代码:https://github.com/SunsetQuest/Fast-Integer-Log2)
Function Time1 Time2 Errors Full-32-Bit Zero_Support
Log2_SunsetQuest3: 18 18 0 (Y) (N)
Log2_SunsetQuest4: 18 18 0 (Y) (N)
Log2_SPWorley: 18 18 0 (Y) (N)
MostSigBit_spender: 20 19 0 (Y) (Y)
Log2_HarrySvensson: 26 29 0 (Y) (N)
Log2_WiegleyJ: 27 23 0 (Y) (N)
Log2_DanielSig: 28 24 3125 (N) (N)
FloorLog2_Matthew_Watson: 29 25 0 (Y) (Y)
Log2_SunsetQuest1: 31 28 0 (Y) (Y)
HighestBitUnrolled_Kaz: 33 33 3125 (Y) (Y)
Log2_Flynn1179: 58 52 0 (Y) (N)
GetMsb_user3177100: 58 53 0 (Y) (N)
Log2floor_greggo: 89 101 0 (Y) (Y)
FloorLog2_SN17: 102 43 0 (Y) (N)
Log2_SunsetQuest2: 118 140 0 (Y) (Y)
Log2_Papayaved: 125 60 0 (Y) (N)
Msb_Protagonist: 136 118 0 (Y) (N)
Log2_SunsetQuest0: 206 202 0 (Y) (Y)
BitScanReverse2: 228 240 3125 (N) (Y)
UsingStrings_Rob: 2346 1494 0 (Y) (N)
Zero_Support = Supports Neg Return on Zero
Full-32-Bit = Supports full 32-bit (some just support 31 bits)
Time1 = benchmark for sizes up to 32-bit (same number tried for each size)
Time2 = benchmark for sizes up to 16-bit (for measuring perf with small numbers)
Benchmark notes: AMD Ryzen CPU, Release mode, no-debugger attached, .net core 2.1
答案 5 :(得分:1)
已经有很多答案提供了对console.log('documentIndex searchParm...', JSON.stringify(docIndex.searchParm));
// documentIndex searchParm... documentIndex searchParm... {"searchType":"fullText","searchTerm":"G Triad"}
console.log('documentIndex...', JSON.stringify(docIndex));
// documentIndex... searchParm: {"name":"G Triad","description":"","key":"",
// "iconPath": "", "searchParm": { "searchType": "fullText", "searchTerm": "G Triad" },
// "objectId": 123}
的快速近似方法,但对于log2(int)
却没有几个答案,因此以下两个(给出了Java实现)同时使用了查找表和尾数/位hacking: / p>
log2(float)
注意:
/**
* Calculate the logarithm to base 2, handling special cases.
*/
public static float log2(float x) {
final int bits = Float.floatToRawIntBits(x);
final int e = (bits >> 23) & 0xff;
final int m = (bits & 0x7fffff);
if (e == 255) {
if (m != 0) {
return Float.NaN;
}
return ((bits >> 31) != 0) ? Float.NaN : Float.POSITIVE_INFINITY;
}
if ((bits >> 31) != 0) {
return (e == 0 && m == 0) ? Float.NEGATIVE_INFINITY : Float.NaN;
}
return (e == 0 ? data[m >>> qm1] : e + data[((m | 0x00800000) >>> q)]);
}
第二种方法放弃了另一种方法中存在的检查,因此具有以下特殊情况:
这两种方法都依赖于查找表/**
* Calculate the logarithm using base 2. Requires the argument be finite and
* positive.
*/
public static float fastLog2(float x) {
final int bits = Float.floatToRawIntBits(x);
final int e = (bits >> 23) & 0xff;
final int m = (bits & 0x7fffff);
return (e == 0 ? data[m >>> qm1] : e + data[((m | 0x00800000) >>> q)]);
}
(以及变量data
和q
)。这些使用以下方法填充。 qm1
定义了精度空间的权衡。
n
static int q, qm1;
static float[] data;
/**
* Compute lookup table for a given base table size.
*
* @param n The number of bits to keep from the mantissa. Table storage =
* 2^(n+1) * 4 bytes, e.g. 64Kb for n=13. Must be in the range
* 0<=n<=23
*/
public static void populateLUT(int n) {
final int size = 1 << (n + 1);
q = 23 - n;
qm1 = q - 1;
data = new float[size];
for (int i = 0; i < size; i++) {
data[i] = (float) (Math.log(i << q) / Math.log(2)) - 150;
}
}
答案 6 :(得分:0)
这是我用过的东西:
unsigned log2(register unsigned n) {
register unsigned i;
for (i=0; (n & 1); n>>=1, i++);
return i;
}
编辑: 检查这些(ffz变体):https://patchwork.kernel.org/patch/8845631/
答案 7 :(得分:0)
$scorecard$player_1$scoring$rounds$round_1 = $dice_hand;
答案 8 :(得分:0)
(我没有做任何测量,所以可能不匹配,但是我认为用户user9337139的想法很简洁,想在C#中尝试同样的方法-他是C ++)。
这是一个C#int Magnitude(byte)
函数,其功能是将字节值转换为浮点并从IEEE float representation中提取指数。
using System.Runtime.InteropServices;
[StructLayout(LayoutKind.Explicit)]
struct UnionWorker
{
[FieldOffset(0)]
public int i;
[FieldOffset(0)]
public float f;
}
static int Magnitude(byte b)
{
UnionWorker u;
u.i = 0; // just to please the compiler
u.f = b;
return Math.Max((u.i >> 23) & 0xFF, 126) - 126;
}
将零返回零,将8返回0xFF,返回您期望的其他值。
零是一个特例,因此我需要使用Math.Max
钳位。我怀疑user9337139的解决方案可能有类似的问题。
请注意,此未已针对字节序问题进行过测试-警告购买者。
答案 9 :(得分:0)
清洁可靠,快速!
(需要.net core 3或更高版本)
int val = BitOperations.Log2(x);