当我实现xxHash时,我尝试将内联汇编用作“ ROL”操作。这是我的测试代码:
class Program
{
private delegate uint RolFunc(uint x, int y);
private static readonly byte[] _AsmCodeROL64 = new byte[] { 0x8B, 0xC1, 0x8B, 0xCA, 0xD3, 0xC0, 0xC3 };
/*X64 ASM
8B C1 - mov eax,ecx
8B CA - mov ecx,edx
D3 C0 - rol eax,cl
C3 - ret
*/
private static readonly byte[] _AsmCodeROL32 = new byte[] { 0x8B, 0x44, 0x24, 0x08, 0x8B, 0x4C, 0x24, 0x04, 0xD3, 0xC0, 0xC2, 0x08, 0x00 };
/*X86 ASM
8B 44 24 08 - mov eax,[esp+08]
8B 4C 24 04 - mov ecx,[esp+04]
D3 C0 - rol eax,cl
C2 0800 - ret 0008
*/
private static readonly RolFunc AsmRol;
[DllImport("kernel32.dll", SetLastError = false, CallingConvention = CallingConvention.Winapi)]
private static extern IntPtr GetCurrentProcess();
[DllImport("kernel32.dll", SetLastError = false, CallingConvention = CallingConvention.Winapi)]
[return: MarshalAs(UnmanagedType.Bool)]
private unsafe static extern bool VirtualProtectEx([In] IntPtr hProcess, [In] void* lpAddress, [In, MarshalAs(UnmanagedType.SysUInt)] UIntPtr dwSize, [In] uint flNewProtect, [Out] out uint lpflOldProtect);
private const uint PAGE_EXECUTE_READWRITE = 0x40;
private static uint RotateLeft32(uint value, int count)
{
return (value << count) | (value >> (32 - count));
}
static unsafe Program()
{
var asmCodeROL = IntPtr.Size == 4 ? _AsmCodeROL32 : _AsmCodeROL64;
fixed (void* funcAddr = asmCodeROL)
{
VirtualProtectEx(GetCurrentProcess(), funcAddr, (UIntPtr)asmCodeROL.Length, PAGE_EXECUTE_READWRITE, out var lastProtect);
AsmRol = Marshal.GetDelegateForFunctionPointer<RolFunc>(new IntPtr(funcAddr));
}
}
static void Main(string[] args)
{
var sw = new Stopwatch();
sw.Start();
var asmResult = AsmRol(10, 2);
sw.Stop();
Console.WriteLine(sw.Elapsed.TotalMilliseconds);
sw.Reset();
sw.Start();
var mtdResult = RotateLeft32(10, 2);
sw.Stop();
Console.WriteLine(sw.Elapsed.TotalMilliseconds);
sw.Reset();
sw.Start();
asmResult = AsmRol(11, 2);
sw.Stop();
Console.WriteLine(sw.Elapsed.TotalMilliseconds);
sw.Reset();
sw.Start();
mtdResult = RotateLeft32(11, 2);
sw.Stop();
Console.WriteLine(sw.Elapsed.TotalMilliseconds);
sw.Reset();
sw.Start();
asmResult = AsmRol(12, 2);
sw.Stop();
Console.WriteLine(sw.Elapsed.TotalMilliseconds);
sw.Reset();
sw.Start();
mtdResult = RotateLeft32(12, 2);
sw.Stop();
Console.WriteLine(sw.Elapsed.TotalMilliseconds);
sw.Reset();
sw.Start();
for (uint i = 0; i < 0x10000; i++)
{
asmResult = AsmRol(i, 2);
}
sw.Stop();
Console.WriteLine(sw.Elapsed.TotalMilliseconds);
sw.Reset();
sw.Start();
for (uint i = 0; i < 0x10000; i++)
{
mtdResult = RotateLeft32(i, 2);
}
sw.Stop();
Console.WriteLine(sw.Elapsed.TotalMilliseconds);
}
}
结果如下:
0.0108 0.4562 0.002 0.0008 0.0014 0.0008 8.892 0.9648
我发现内联汇编的代码长度比该方法短得多,但是为什么除了第一次之外,AsmRol却比RotateLeft32慢得多?