今天我们有一个非常有趣的案例让我很烦恼。简而言之,我们进行了一些重构,从重复的代码中清除触发器,将其提取到单个可重用的存储过程中。我们认为这种重构没有副作用,但我们错了。发布后,我们遇到了很多死锁和性能下降,没有明显的原因。在检查系统表以查看所采用的DB之后,我们发现上面的重构已经涉及,我们最终还是回滚了更新。
我们没有在测试环境中重现问题来探测治疗方法,因此需要一些棘手的条件才能使问题可见。
以下是变更内容的详细信息。我们更新了很多触发器,但它们都非常相似,我会告诉你一个。它应该足够了,因为我发现死锁图表显示存在死锁,其中两个进程正在执行单个触发器(显示如下)并且死锁。
让我先从之前有效的解决方案开始(我们回滚到这个解决方案,看起来几乎与下面所示的死锁解决方案完全相同)。
CREATE TRIGGER [dbo].[TR__xyz__update_sync_publishers]
ON [dbo].[xyz]
AFTER INSERT, DELETE, UPDATE
AS
BEGIN
SET NOCOUNT ON;
if(TRIGGER_NESTLEVEL() = 1)
BEGIN
create table #AffectedIDs (advisor_id int primary key)
insert into #AffectedIDs
select distinct t.id
from
(select id
from inserted
inner join xyz a ON a.id = id
where [text] <> ''
union
select id
from inserted
inner join xyz a ON a.id = id
where [text] <> '') t
declare @date datetime = getutcdate()
declare @RegisteredObjectTypeID int = 2
declare @SyncPublisherSourceID int = 1
update pub
set pub.master_update_date = @date
from #AffectedIDs affected
inner join sync_publishers pub on
pub.sync_registered_object_type_id = @RegisteredObjectTypeID
and pub.sync_publisher_source_id = @SyncPublisherSourceID
and pub.sync_object_id = affected.advisor_id
insert into sync_publishers (sync_object_id, sync_registered_object_type_id, sync_publisher_source_id , master_update_date)
select
affected.advisor_id,
@RegisteredObjectTypeID,
@SyncPublisherSourceID,
@date
from #AffectedIDs affected
left join sync_publishers pub on
pub.sync_registered_object_type_id = @RegisteredObjectTypeID
and pub.sync_publisher_source_id = @SyncPublisherSourceID
and pub.sync_object_id = affected.advisor_id
where
pub.sync_object_id is null
drop table #AffectedIDs
END
END
这是死锁的新触发器。
CREATE TRIGGER [dbo].[TR__xyz__update_sync_publishers]
ON [dbo].[xyz]
AFTER INSERT,DELETE,UPDATE
AS
BEGIN
SET NOCOUNT ON;
declare @ids dtInt
insert into @ids
select distinct t.id
from
(
select id from inserted
INNER JOIN xyz a ON a.id = id
WHERE [text] <> ''
union
select id from inserted
INNER JOIN xyz a ON a.id = id
WHERE [text] <> ''
) t
exec SyncTracker_PublishEvent 2, @ids
END
以下是提取SP的定义:
CREATE PROCEDURE [dbo].[SyncTracker_PublishEvent]
@objectTypeId int,
@ids dtInt readonly
AS
BEGIN
SET NOCOUNT ON;
if(TRIGGER_NESTLEVEL() > 1) RETURN;
declare @pubSourceId int = 1
declare @date datetime = getutcdate()
update pub
set pub.master_update_date = @date
from @ids affected
inner join sync_publishers pub
on pub.sync_registered_object_type_id = @objectTypeId
and pub.sync_publisher_source_id = @pubSourceId
and pub.sync_object_id = affected.value
insert into sync_publishers (sync_object_id, sync_registered_object_type_id, sync_publisher_source_id , master_update_date)
select affected.value, @objectTypeId, @pubSourceId, @date
from @ids affected
left join sync_publishers pub
on pub.sync_registered_object_type_id = @objectTypeId
and pub.sync_publisher_source_id = @pubSourceId
and pub.sync_object_id = affected.value
where
pub.sync_object_id is null
END
GO
dtInt的定义。
CREATE TYPE [dbo].[dtInt] AS TABLE
(
[value] [int] NOT NULL,
PRIMARY KEY CLUSTERED
(
[value] ASC
)
)
最后是死锁图。
<deadlock>
<victim-list>
<victimProcess id="processe1892fe8c8" />
</victim-list>
<process-list>
<process id="processe1892fe8c8" taskpriority="0" logused="3824" waitresource="KEY: 5:72057602924150784 (4776e78e2961)" waittime="5686" ownerId="2583257965" transactionname="user_transaction" lasttranstarted="2016-10-03T08:30:42.500" XDES="0xe192b24408" lockMode="U" schedulerid="6" kpid="41296" status="suspended" spid="141" sbid="0" ecid="0" priority="0" trancount="2" lastbatchstarted="2016-10-03T08:30:42.503" lastbatchcompleted="2016-10-03T08:30:42.493" lastattention="2016-10-03T08:29:01.693" clientapp="..." hostname="..." hostpid="22572" loginname="kbuser" isolationlevel="read committed (2)" xactid="2583257965" currentdb="5" lockTimeout="4294967295" clientoption1="673316896" clientoption2="128056">
<executionStack>
<frame procname="63c1b4d8-1c55-4429-b057-81fb6da8f780.dbo.SyncTracker_PublishEvent" line="21" stmtstart="1178" stmtend="1680" sqlhandle="0x030005007bf23c4b5012b40092a6000001000000000000000000000000000000000000000000000000000000">
update pub
set pub.master_update_date = @date
from @ids affected
inner join sync_publishers pub
on pub.sync_registered_object_type_id = @objectTypeId
and pub.sync_publisher_source_id = @pubSourceId
and pub.sync_object_id = affected.valu </frame>
<frame procname="63c1b4d8-1c55-4429-b057-81fb6da8f780.dbo.TR__xyz__update_sync_publishers" line="28" stmtstart="1300" stmtend="1372" sqlhandle="0x03000500f711233ddee4c60090a6000000000000000000000000000000000000000000000000000000000000">
exec SyncTracker_PublishEvent 2, @id </frame>
<frame procname="unknown" line="1" stmtstart="1054" stmtend="3032" sqlhandle="0x02000000912653235c5ef3529289f19ae4445e62ee1ccbc00000000000000000000000000000000000000000">
unknown </frame>
<frame procname="unknown" line="1" sqlhandle="0x0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000">
unknown </frame>
</executionStack>
</process>
<process id="processdfa401b848" taskpriority="0" logused="9384" waitresource="KEY: 5:72057602924150784 (1501093f83b4)" waittime="5814" ownerId="2582414029" transactionname="user_transaction" lasttranstarted="2016-10-03T08:30:09.933" XDES="0x104486ac408" lockMode="U" schedulerid="1" kpid="19548" status="suspended" spid="213" sbid="0" ecid="0" priority="0" trancount="2" lastbatchstarted="2016-10-03T08:30:53.047" lastbatchcompleted="2016-10-03T08:30:53.047" lastattention="1900-01-01T00:00:00.047" clientapp="..." hostname="..." hostpid="6196" loginname="kbuser" isolationlevel="read committed (2)" xactid="2582414029" currentdb="5" lockTimeout="4294967295" clientoption1="673316896" clientoption2="128056">
<executionStack>
<frame procname="63c1b4d8-1c55-4429-b057-81fb6da8f780.dbo.SyncTracker_PublishEvent" line="21" stmtstart="1178" stmtend="1680" sqlhandle="0x030005007bf23c4b5012b40092a6000001000000000000000000000000000000000000000000000000000000">
update pub
set pub.master_update_date = @date
from @ids affected
inner join sync_publishers pub
on pub.sync_registered_object_type_id = @objectTypeId
and pub.sync_publisher_source_id = @pubSourceId
and pub.sync_object_id = affected.valu </frame>
<frame procname="63c1b4d8-1c55-4429-b057-81fb6da8f780.dbo.TR__xyz__update_sync_publishers" line="28" stmtstart="1300" stmtend="1372" sqlhandle="0x03000500f711233ddee4c60090a6000000000000000000000000000000000000000000000000000000000000">
exec SyncTracker_PublishEvent 2, @id </frame>
<frame procname="unknown" line="1" stmtstart="1120" stmtend="3132" sqlhandle="0x020000007414d821ed68a2ab4462b4eca6b2fdb4ba28cc350000000000000000000000000000000000000000">
unknown </frame>
<frame procname="unknown" line="1" sqlhandle="0x0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000">
unknown </frame>
</executionStack>
</process>
</process-list>
<resource-list>
<keylock hobtid="72057602924150784" dbid="5" objectname="63c1b4d8-1c55-4429-b057-81fb6da8f780.dbo.sync_publishers" indexname="IX__sync_publishers__registered_object_type_id__sync_object_id" id="lock10887a96b00" mode="X" associatedObjectId="72057602924150784">
<owner-list>
<owner id="processdfa401b848" mode="X" />
</owner-list>
<waiter-list>
<waiter id="processe1892fe8c8" mode="U" requestType="wait" />
</waiter-list>
</keylock>
<keylock hobtid="72057602924150784" dbid="5" objectname="63c1b4d8-1c55-4429-b057-81fb6da8f780.dbo.sync_publishers" indexname="IX__sync_publishers__registered_object_type_id__sync_object_id" id="lockdb7d7b8200" mode="X" associatedObjectId="72057602924150784">
<owner-list>
<owner id="processe1892fe8c8" mode="X" />
</owner-list>
<waiter-list>
<waiter id="processdfa401b848" mode="U" requestType="wait" />
</waiter-list>
</keylock>
</resource-list>
</deadlock>
此处提供了sync_publishers的定义:http://pastebin.com/LviwwCDi。
如果您对可行原因有任何想法 - 欢迎分享 - 我们将非常感谢!
更新1. UPDATE / INSERT到sync_publishers的实际执行计划
实际执行计划看起来非常相似。
更新2.尝试了一些建议
我今天尝试了一些建议:
由于删除列中的非聚集索引中缺少sync_publisher_source_id
,因此在查询计划中删除了“密钥查找” - 在我们的实现中并不是必需的。
将UPDATE + INSERT
重写为单个MERGE
声明。
MERGE sync_publishers2 t
USING @ids s
ON s.[value] = t.sync_object_id
and t.sync_registered_object_type_id = @objectTypeId
WHEN MATCHED
THEN UPDATE
SET master_update_date = @date
WHEN NOT MATCHED
THEN INSERT
(sync_object_id, sync_registered_object_type_id, master_update_date)
VALUES
(s.[value], @objectTypeId, @date);
开始在MERGE
语句中遇到死锁。可以在此处查看新的死锁图:http://pastebin.com/QNJk7tea。
更新3.尝试MERGE提示
我尝试使用MERGE
和xlock
提示进行holdlock
- 但没有运气 - 再次在MERGE上陷入僵局。
MERGE sync_publishers2 with(xlock, holdlock) t