在SQL clr程序集中测试性能时,似乎调用unmaged代码是极端缓慢的
我作为控制台应用程序和clr程序集进行了测试,结果如下
控制台应用:
Clr汇编:
PInvoke.Empty()是一个只有ret语句的asm函数。此函数是使用virtualAlloc创建的。
public static class Pinvoke
{
[SuppressUnmanagedCodeSecurity]
[UnmanagedFunctionPointer(CallingConvention.StdCall)]
public delegate ulong FuncUInt64();
private const uint PAGE_EXECUTE = 0x10;
private const uint PAGE_EXECUTE_READWRITE = 0x40;
private const uint MEM_COMMIT = 0x1000;
private const uint MEM_RELEASE = 0x8000;
public static readonly FuncUInt64 Empty;
private static readonly byte[] ReturnOnlyAsm = { 0xC3 };
static Pinvoke()
{
var buf = IntPtr.Zero;
try
{
// We pad the functions to 64 bytes (the length of a cacheline on the Intel processors)
var rdtscpLength = (ReturnOnlyAsm.Length & 63) != 0 ? (ReturnOnlyAsm.Length | 63) + 1 : ReturnOnlyAsm.Length;
buf = VirtualAlloc(IntPtr.Zero, (IntPtr) rdtscpLength, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
if (buf == IntPtr.Zero) throw new Win32Exception();
Marshal.Copy(ReturnOnlyAsm, 0, buf, ReturnOnlyAsm.Length);
for (var i = ReturnOnlyAsm.Length; i < rdtscpLength; i++) Marshal.WriteByte(buf, i, 0x90); // nop
// Change the access of the allocated memory from R/W to Execute
var result = VirtualProtect(buf, (IntPtr) rdtscpLength, PAGE_EXECUTE, out var oldProtection);
if (!result) throw new Win32Exception();
// Create a delegate to the "function"
Empty = (FuncUInt64) Marshal.GetDelegateForFunctionPointer(buf, typeof(FuncUInt64));
buf = IntPtr.Zero;
}
finally
{
if (buf != IntPtr.Zero)
VirtualFree(buf, IntPtr.Zero, MEM_RELEASE);
}
}
[DllImport("kernel32.dll", ExactSpelling = true, SetLastError = true)]
private static extern IntPtr VirtualAlloc(IntPtr lpAddress, IntPtr dwSize, uint flAllocationType, uint flProtect);
[DllImport("kernel32.dll", ExactSpelling = true, SetLastError = true)]
[return: MarshalAs(UnmanagedType.Bool)]
private static extern bool VirtualProtect(IntPtr lpAddress, IntPtr dwSize, uint flAllocationType, out uint lpflOldProtect);
[DllImport("kernel32.dll", ExactSpelling = true, SetLastError = true)]
[return: MarshalAs(UnmanagedType.Bool)]
private static extern bool VirtualFree(IntPtr lpAddress, IntPtr dwSize, uint dwFreeType);
}
十进制解析没有未管理的代码,但GetTimestamp有。调用无人编码似乎慢了250多倍
对于用于测试的代码,请参阅https://github.com/Anderman/SqlClrPerformance。 此代码使用asm代码技巧来调用rdtscp定时器,并且只能在64位上运行。
使用sql server 2017(13.0.1742.0)并使用sql和console app的发布版进行测试。
还使用秒表测试,但不太准确,但显示相同的问题
private void swGetCurrentProcess()
{
var i = 0;
var sw = Stopwatch.StartNew();
do
{
i++;
Process.GetCurrentProcess();
} while (sw.ElapsedMilliseconds < 5000);
var nanoSecodsPerIteration = 5_000_000_000 / i;
_reporter($"GetCurrentProcess with StopWatch {nanoSecodsPerIteration,5:0.0} ns ");
}
控制台:
GetCurrentProcess with StopWatch 426.0 ns
SQLCLR:
GetCurrentProcess with StopWatch 12422.0 ns