我们有一个本机/托管进程,它在20-30个实例中连续运行,具有不同的配置。有时其中一个进程只是挂起,所有可以做的就是终止进程并重新启动它。
我有几个挂起的进程,问题似乎来自NLog异步日志写入程序线程,当StringBuilder
的构造函数被调用并且垃圾收集被初始化但由于某种原因GC从未完成并且每个线程仍然存在暂停。
转储线程:
ThreadCount: 17
UnstartedThread: 1
BackgroundThread: 15
PendingThread: 1
DeadThread: 0
Hosted Runtime: no
Lock
ID OSID ThreadOBJ State GC Mode GC Alloc Context Domain Count Apt Exception
0 1 2be8 000000adc21833d0 2a020 Preemptive 0000000000000000:0000000000000000 000000adc2175630 0 MTA
2 2 4a0c 000000adc218f270 2b220 Preemptive 0000000000000000:0000000000000000 000000adc2175630 0 MTA (Finalizer)
3 3 3d44 000000adc221a490 102a220 Preemptive 0000000000000000:0000000000000000 000000adc2175630 0 MTA (Threadpool Worker)
4 4 2c 000000addb585160 2b220 Preemptive 0000000000000000:0000000000000000 000000adc2175630 0 MTA
6 10 4b1c 000000addb58ba90 1029220 Cooperative 0000000000000000:0000000000000000 000000adc2175630 3 MTA (GC) (Threadpool Worker)
8 12 48a4 000000addb5b0010 20220 Preemptive 0000000000000000:0000000000000000 000000adc2175630 0 STA
9 13 1c18 000000addb66c960 1029220 Preemptive 0000000000000000:0000000000000000 000000adc2175630 1 MTA (Threadpool Worker)
11 14 2578 000000addc8c2470 2b220 Preemptive 0000000000000000:0000000000000000 000000adc2175630 0 MTA
12 15 36cc 000000addb5d1ae0 2b220 Preemptive 0000000000000000:0000000000000000 000000adc2175630 0 MTA
14 16 3b38 000000addc971b60 1039220 Preemptive 0000000000000000:0000000000000000 000000adc2175630 0 Ukn (Threadpool Worker)
15 18 17c8 000000ade3192bd0 1039220 Preemptive 0000000000000000:0000000000000000 000000adc2175630 0 Ukn (Threadpool Worker)
16 9 28c0 000000ade3190c90 1039220 Preemptive 0000000000000000:0000000000000000 000000adc2175630 0 Ukn (Threadpool Worker)
17 5 4b08 000000ade31986f0 1029220 Preemptive 0000000000000000:0000000000000000 000000adc2175630 1 MTA (Threadpool Worker)
18 6 aa4 000000ade3191460 1039220 Preemptive 0000000000000000:0000000000000000 000000adc2175630 0 Ukn (Threadpool Worker)
19 11 2120 000000ade318f520 8029220 Preemptive 0000000000000000:0000000000000000 000000adc2175630 0 MTA (Threadpool Completion Port)
20 8 397c 000000ade3191c30 1039220 Preemptive 0000000000000000:0000000000000000 000000adc2175630 0 Ukn (Threadpool Worker)
22 7 1ee4 000000ade31904c0 1600 Preemptive 0000000000000000:0000000000000000 000000adc2175630 0 Ukn
可以看出,线程6启动了垃圾收集。 线程6的CLRStack:
OS Thread Id: 0x4b1c (6)
Child SP IP Call Site
000000addbe1e748 000007fa52532c6a [HelperMethodFrame: 000000addbe1e748]
000000addbe1e8c0 000007fa48877dff System.Text.StringBuilder..ctor(System.String, Int32, Int32, Int32)
000000addbe1e920 000007fa4894a93d System.Text.StringBuilder..ctor(System.String, Int32)
000000addbe1e960 000007f9ea49d9fd NLog.Layouts.SimpleLayout.GetFormattedMessage(NLog.LogEventInfo)
000000addbe1ea00 000007f9ea4ac36b NLog.Targets.FileTarget.GetBytesToWrite(NLog.LogEventInfo)
000000addbe1ea50 000007f9ea4aa58d NLog.Targets.FileTarget.Write(NLog.Common.AsyncLogEventInfo[])
000000addbe1ebc0 000007f9ea4a75b6 NLog.Targets.Target.WriteAsyncLogEvents(NLog.Common.AsyncLogEventInfo[])
000000addbe1ecd0 000007f9ea4a6918 NLog.Targets.Wrappers.AsyncTargetWrapper.ProcessPendingEvents(System.Object)
000000addbe1ed80 000007fa4894d0b5 System.Threading.ExecutionContext.RunInternal(System.Threading.ExecutionContext, System.Threading.ContextCallback, System.Object, Boolean)
000000addbe1eee0 000007fa4894ce19 System.Threading.ExecutionContext.Run(System.Threading.ExecutionContext, System.Threading.ContextCallback, System.Object, Boolean)
000000addbe1ef10 000007fa488ea7ee System.Threading.TimerQueueTimer.CallCallback()
000000addbe1ef80 000007fa488ea66e System.Threading.TimerQueueTimer.Fire()
000000addbe1eff0 000007fa488f4104 System.Threading.TimerQueue.FireNextTimers()
000000addbe1f528 000007fa49b1c4f3 [DebuggerU2MCatchHandlerFrame: 000000addbe1f528]
线程6的本机堆栈:
Child-SP RetAddr Call Site
000000ad`dbe1e2a8 000007fa`4f7a10ea ntdll!ZwWaitForSingleObject+0xa
000000ad`dbe1e2b0 000007fa`49ac831f KERNELBASE!WaitForSingleObjectEx+0x92
000000ad`dbe1e350 000007fa`49ac82d7 clr!CLREventBase::WaitEx+0x16c
000000ad`dbe1e390 000007fa`49ac8298 clr!CLREventBase::WaitEx+0x103
000000ad`dbe1e3f0 000007fa`49c6ace3 clr!CLREventBase::WaitEx+0x70
000000ad`dbe1e480 000007fa`49c6ac5c clr!WKS::gc_heap::create_bgc_thread+0x6f
000000ad`dbe1e4b0 000007fa`4a1352f9 clr!WKS::gc_heap::prepare_bgc_thread+0x32
000000ad`dbe1e4e0 000007fa`49c0db3d clr! ?? ::FNODOBFM::`string'+0xb74e1
000000ad`dbe1e570 000007fa`49c0e090 clr!WKS::GCHeap::GarbageCollectGeneration+0xdd
000000ad`dbe1e5c0 000007fa`49ac7e4e clr!WKS::GCHeap::Alloc+0x29d
000000ad`dbe1e610 000007fa`48877dff clr!JIT_NewArr1+0x49e
000000ad`dbe1e8c0 000007fa`4894a93d mscorlib_ni+0x497dff
000000ad`dbe1e920 000007f9`ea49d9fd mscorlib_ni+0x56a93d
000000ad`dbe1e960 000007f9`ea4ac36b 0x7f9`ea49d9fd
000000ad`dbe1ea00 000007f9`ea4aa58d 0x7f9`ea4ac36b
000000ad`dbe1ea50 000007f9`ea4a75b6 0x7f9`ea4aa58d
000000ad`dbe1ebc0 000007f9`ea4a6918 0x7f9`ea4a75b6
000000ad`dbe1ecd0 000007fa`4894d0b5 0x7f9`ea4a6918
000000ad`dbe1ed80 000007fa`4894ce19 mscorlib_ni+0x56d0b5
000000ad`dbe1eee0 000007fa`488ea7ee mscorlib_ni+0x56ce19
000000ad`dbe1ef10 000007fa`488ea66e mscorlib_ni+0x50a7ee
000000ad`dbe1ef80 000007fa`488f4104 mscorlib_ni+0x50a66e
000000ad`dbe1eff0 000007fa`49b1c4f3 mscorlib_ni+0x514104
000000ad`dbe1f090 000007fa`49b1c56e clr!CallDescrWorkerInternal+0x83
000000ad`dbe1f0d0 000007fa`49b1c5e0 clr!CallDescrWorkerWithHandler+0x4a
000000ad`dbe1f110 000007fa`49afb77f clr!MethodDescCallSite::CallTargetWorker+0x2e2
000000ad`dbe1f2c0 000007fa`49b52a2c clr!AppDomainTimerCallback_Worker+0x23
000000ad`dbe1f3a0 000007fa`49b529ba clr!Frame::Pop+0x50
000000ad`dbe1f3e0 000007fa`49b52931 clr!COMCustomAttribute::PopSecurityContextFrame+0x192
000000ad`dbe1f4e0 000007fa`49b52a8f clr!COMCustomAttribute::PopSecurityContextFrame+0xbd
000000ad`dbe1f570 000007fa`49afb715 clr!Thread::SetExposedContext+0x93
000000ad`dbe1f5d0 000007fa`49afb685 clr!AppDomainTimerCallback+0x6a
000000ad`dbe1f630 000007fa`49afb1c2 clr!ThreadpoolMgr::AsyncTimerCallbackCompletion+0x36
000000ad`dbe1f680 000007fa`49af9e8b clr!UnManagedPerAppDomainTPCount::DispatchWorkItem+0x11a
000000ad`dbe1f720 000007fa`49af9d8a clr!ThreadpoolMgr::ExecuteWorkRequest+0x4c
000000ad`dbe1f750 000007fa`49b1adde clr!ThreadpoolMgr::WorkerThreadStart+0xf6
000000ad`dbe1f810 000007fa`51961842 clr!Thread::intermediateThreadProc+0x7d
000000ad`dbe1fcd0 000007fa`5257e379 kernel32!BaseThreadInitThunk+0x1a
000000ad`dbe1fd00 00000000`00000000 ntdll!RtlUserThreadStart+0x1d
我们在NLog中有2个异步目标,一个文件目标和一个自定义Upd目标。 线程9和17也保持不同于0的锁定计数是NLog异步线程,线程9似乎写入文件,如线程6,线程17写入UDP目标。
线程9 CLRStack:
OS Thread Id: 0x1c18 (9)
Child SP IP Call Site
000000addd27e5d8 000007fa52532c6a [HelperMethodFrame: 000000addd27e5d8]
000000addd27e750 000007fa48877dff System.Text.StringBuilder..ctor(System.String, Int32, Int32, Int32)
000000addd27e7b0 000007fa4894a93d System.Text.StringBuilder..ctor(System.String, Int32)
000000addd27e7f0 000007f9ea49d9fd NLog.Layouts.SimpleLayout.GetFormattedMessage(NLog.LogEventInfo)
000000addd27e890 000007f9ea4ac36b NLog.Targets.FileTarget.GetBytesToWrite(NLog.LogEventInfo)
000000addd27e8e0 000007f9ea4aa58d NLog.Targets.FileTarget.Write(NLog.Common.AsyncLogEventInfo[])
000000addd27ea50 000007f9ea4a75b6 NLog.Targets.Target.WriteAsyncLogEvents(NLog.Common.AsyncLogEventInfo[])
000000addd27eb60 000007f9ea4a6918 NLog.Targets.Wrappers.AsyncTargetWrapper.ProcessPendingEvents(System.Object)
000000addd27ec10 000007fa4894d0b5 System.Threading.ExecutionContext.RunInternal(System.Threading.ExecutionContext, System.Threading.ContextCallback, System.Object, Boolean)
000000addd27ed70 000007fa4894ce19 System.Threading.ExecutionContext.Run(System.Threading.ExecutionContext, System.Threading.ContextCallback, System.Object, Boolean)
000000addd27eda0 000007fa488ea7ee System.Threading.TimerQueueTimer.CallCallback()
000000addd27ee10 000007fa488ea66e System.Threading.TimerQueueTimer.Fire()
000000addd27ee80 000007fa488926e3 System.Threading.QueueUserWorkItemCallback.System.Threading.IThreadPoolWorkItem.ExecuteWorkItem()
000000addd27eed0 000007fa4888764a System.Threading.ThreadPoolWorkQueue.Dispatch()
000000addd27f428 000007fa49b1c4f3 [DebuggerU2MCatchHandlerFrame: 000000addd27f428]
线程17 CLRStack:
OS Thread Id: 0x4b08 (17)
Child SP IP Call Site
000000addbc1db78 000007fa52532c6a [HelperMethodFrame: 000000addbc1db78]
000000addbc1dcf0 000007fa48877dff System.Text.StringBuilder..ctor(System.String, Int32, Int32, Int32)
000000addbc1dd50 000007fa4894a93d System.Text.StringBuilder..ctor(System.String, Int32)
000000addbc1dd90 000007f9ea49d9fd NLog.Layouts.SimpleLayout.GetFormattedMessage(NLog.LogEventInfo)
000000addbc1de30 000007f9ea4aab5c MyCompany.Gelf.NLog.GelfTargetBase.BuildGelfMessage(NLog.LogEventInfo)
000000addbc1dec0 000007f9ea4aa7bc MyCompany.Gelf.NLog.DLM.GelfUdpTargetDLM.Write(NLog.LogEventInfo)
000000addbc1df30 000007f9ea4a9ec5 NLog.Targets.Target.Write(NLog.Common.AsyncLogEventInfo)
000000addbc1df90 000007f9ea4a9cfd NLog.Targets.Target.Write(NLog.Common.AsyncLogEventInfo[])
000000addbc1e000 000007f9ea4a75b6 NLog.Targets.Target.WriteAsyncLogEvents(NLog.Common.AsyncLogEventInfo[])
000000addbc1e110 000007f9ea4a6918 NLog.Targets.Wrappers.AsyncTargetWrapper.ProcessPendingEvents(System.Object)
000000addbc1e1c0 000007fa4894d0b5 System.Threading.ExecutionContext.RunInternal(System.Threading.ExecutionContext, System.Threading.ContextCallback, System.Object, Boolean)
000000addbc1e320 000007fa4894ce19 System.Threading.ExecutionContext.Run(System.Threading.ExecutionContext, System.Threading.ContextCallback, System.Object, Boolean)
000000addbc1e350 000007fa488ea7ee System.Threading.TimerQueueTimer.CallCallback()
000000addbc1e3c0 000007fa488ea66e System.Threading.TimerQueueTimer.Fire()
000000addbc1e430 000007fa488926e3 System.Threading.QueueUserWorkItemCallback.System.Threading.IThreadPoolWorkItem.ExecuteWorkItem()
000000addbc1e480 000007fa4888764a System.Threading.ThreadPoolWorkQueue.Dispatch()
000000addbc1e9d8 000007fa49b1c4f3 [DebuggerU2MCatchHandlerFrame: 000000addbc1e9d8]
总结一下,NLog异步目标编写器线程在创建Stringbuilder
对象时需要更多内存,然后调用GC,之后没有任何反应。我们长期面对这个问题。如果有人能引导我们走向正确的方向,我将非常感激。
答案 0 :(得分:0)
.NET 4.6在垃圾收集器中存在错误,可能导致应用程序挂起。
https://blogs.msdn.microsoft.com/maoni/2015/08/12/gen2-free-list-changes-in-clr-4-6-gc/
这已经与4.6.1以及最新的Windows更新一起修复。