我编写了这个T-SQL脚本,用于汇总由重建过程创建的数据库中的重复行。为此,它执行以下操作:
由于涉及的数据集的大小,此脚本需要一段不合适的时间才能运行。
任何人都可以看到如何将其转换为基于设置的,如果是这样,你能提供一个例子吗?
如果我的描述有点令人困惑,我提前道歉......
declare @MinColTargetPKTable table
(ColIntA int,
ColIntB int,
ColDateTimeA nvarchar(25),
minColTargetPK int
)
insert @minColTargetPKtable
select ColIntA, ColIntB, convert(nvarchar(25),ColDateTimeA,120) as ColDateTimeA,
min(ColTargetPK) as MinColTargetPK from TargetColTable
group by ColIntA, ColIntB, convert(nvarchar(25),ColDateTimeA,120)
declare @TargetColVarchar varchar(max)
declare @updatedColTargetPKs table
(updatedColTargetPKs int)
declare @minColTargetPK int
declare cur cursor
for
select minColTargetPK
from @minColTargetPKtable
open cur
fetch next from cur into @minColTargetPK
while @@FETCH_STATUS = 0
begin
begin try
set @TargetColVarchar =
convert(nvarchar(max),(
select replace(convert(nvarchar(max), isnull(TargetColVarchar,'')) + convert (nvarchar(max),' \par \par \par'), '\par } ', '\par') as
TargetColVarchar
from TargetColTable v1
where ColIntA = (select ColIntA from TargetColTable where ColTargetPK = @minColTargetPK)
and ColIntB = (select ColIntB from TargetColTable where ColTargetPK = @minColTargetPK)
and convert(nvarchar(25),ColDateTimeA,120) = (select convert(nvarchar(25),ColDateTimeA,120) from TargetColTable where ColTargetPK = @minColTargetPK)
order by ColTargetPK
for xml path(''), type
))
set @TargetColVarchar = REPLACE(REPLACE (REPLACE (@TargetColVarchar,'<TargetColVarchar>',''),'</TargetColVarchar>',''), '
','')
update TargetColTable
set TargetColVarchar = @TargetColVarchar
where ColTargetPK = @minColTargetPK
update TargetColTable
set ColTargetStatus = 0
from TargetColTable v1
where ColIntA = (select ColIntA from TargetColTable where ColTargetPK = @minColTargetPK)
and ColIntB = (select ColIntB from TargetColTable where ColTargetPK = @minColTargetPK)
and convert(nvarchar(25),ColDateTimeA,120) = (select convert(nvarchar(25),ColDateTimeA,120) from TargetColTable where ColTargetPK = @minColTargetPK)
and ColTargetPK != @minColTargetPK
Print 'Merge complete for ColTargetPK '+ convert(varchar(50), @minColTargetPK)
end try
begin catch
Print 'Merge failed for ColTargetPK '+ convert (varchar(20),@minColTargetPK)
end catch
fetch next from cur into @minColTargetPK
end
close cur
deallocate cur
编辑:好的,下面是使用Preet的建议将脚本移动到基于集合的操作。为了给出一些额外的背景,TargetTable约为110万行。奇怪的是,在2次试验中,下面基于集合的脚本并不比下面基于光标的脚本在相同的数据子集(大约20000行)上快得多。对于为什么这不会更快的任何想法?
declare @minColTargetPKTable table
(
ColIntA int,
ColIntB int,
ColDateTimeA nvarchar(25),
ColTargetPK int,
concTargetCol varchar(max)
)
insert @minColTargetPKtable (minColIntA,ColIntB,minColDateTimeA,minColTargetPK)
select ColIntA, ColIntB, convert(nvarchar(25),ColDateTimeA,120) as ColDateTimeA, min(ColTargetPK) as minColTargetPK from TargetTable
group by ColIntA, ColIntB, convert(nvarchar(25),ColDateTimeA,120)
update @minColTargetPKTable
set concTargetCol =
(REPLACE(REPLACE(REPLACE(replace(convert(nvarchar(max),
(
select convert(nvarchar(max), isnull(TargetColVarchar,'')) + convert (nvarchar(max),' \par \par \par ') as
TargetColVarchar
from TargetTable v1
where ColIntA = (select ColIntA from TargetTable where ColTargetPK = minColTargetPK)
and ColIntB = (select ColIntB from TargetTable where ColTargetPK = minColTargetPK)
and convert(nvarchar(25),ColDateTimeA,120) = (select convert(nvarchar(25),ColDateTimeA,120) from TargetTable where ColTargetPK = minColTargetPK)
order by ColTargetPK
for xml path(''), type
))
, '\par } ', '\par '),'<TargetColVarchar>',''),'</TargetColVarchar>',''), '
',''))
update TargetTable
set TargetColVarchar = mv.concTargetCol
from @minColTargetPKTable mv
where mv.minColTargetPK = TargetTable.ColTargetPK
update TargetTable
set TargetColStatus = 0
from TargetTable v
inner join @minColTargetPKTable mv on
mv.minColIntA = v.ColIntA
and mv.minColDateTimeA = convert(nvarchar(25),v.ColDateTimeA,120)
and mv.ColIntB = v.ColIntB
where ColTargetPK not in (select minColTargetPK from @minColTargetPKTable)
答案 0 :(得分:1)
好的,我建议以下内容:
然后,您可以根据执行计划进行优化
更新
查看修改后的结果,我会说顺序如下:
使用#temp表,这些表在大型数据集上更具竞争力。
一个。在临时表中添加更多列,以便在大前锋中记录诸如(select ColIntA from TargetColTable where ColTargetPK = @minColTargetPK
和(select ColIntB from TargetColTable where ColTargetPK = @minColTargetPK
之类的内容
湾我认为字符串替换很慢。这仍然很慢。我知道XML不是世界上最快的东西。你能用SQL XML specfic代码替换字符串comp
℃。在底部的第二次更新中,where ColTargetPK not in (select minColTargetPK from @minColTargetPKTable)
可能比精确连接慢,并且您应该同时执行两次更新
但是,请使用“实际查询计划”来解决此问题。
答案 1 :(得分:0)
尝试对您的第一个查询执行此操作
from tct v1
join tct v2 on v2.pk = @pk
where v1.a = v2.a and v1.b = v2.b and v1.dt = v2.dt
这是你的第二个问题
from tct v1
join tct v2 on v2.pk = @pk and v1.pk <> @pk
where v1.a = v2.a and v1.b = v2.b and v1.dt = v2.dt