我有一个需要优化的查询,我不确定如何。这是sql脚本:
declare @tempTable table(color1 int, color2 int, color3 int, color4 int, newToken uniqueidentifier default newid(), ordinal int identifier(1,1))
insert into @tempTable
select color1, color2, color3, color4
from @colorCombination
except
select c1.color as color1, c2.color as color2, c3.color as color3, c4.color as color4
from products p
inner join attributes c1 on c1.pId = p. Id and c1.type = 'primary'
inner join attributes c2 on c2.pId = p. Id and c2.type = 'secondary'
inner join attributes c3 on c3.pId = p. Id and c3.type = 'other1'
inner join attributes c4 on c4.pId = p. Id and c4.type = 'other2'
where p.category = 'furniture'
没有except,两个select语句都运行得非常快,但是除了,它花了将近15分钟
@colorCombination有近24,000行
productattributes返回11,000行
我使用了except,因为需要找到那些尚未在数据库中的13,000然后插入它。
这是在sql server 2008上运行的
是否有更好的方法来获取丢失的记录,而不是使用除?
答案 0 :(得分:2)
来自如此少的行的糟糕表现是由表变量以及它们如何影响执行计划引起的。
此答案使用(color1, color2, color3, color4)
上具有非聚簇索引的临时表。您的原始表变量没有为任何颜色指定not null
,因此我假设某些null
值是可接受的。如果不是,则可以删除代理Id
并在四个颜色列上使用聚簇索引。要允许空值,这也会切换回except
而不是not exists()
。如果null
不是一个因素,那么not exists()
应该更快一些。
有关表变量和临时表的参考:
<小时/>
/* step 1: existing colors in a temp table with an index */
create table #productColors (
id int not null identity (1,1) primary key clustered
, color1 int , color2 int , color3 int , color4 int
);
insert into #productColors (color1, color2, color3, color4)
select distinct
color1 = case when c.[type] = 'primary' then c.color end
, color2 = case when c.[type] = 'secondary' then c.color end
, color3 = case when c.[type] = 'other1' then c.color end
, color4 = case when c.[type] = 'other2' then c.color end
from from products p
inner join attributes c
on p.Id = c.Id
group by p.id;
create nonclustered index ix_productColors
on #productColors (color1, color2, color3, color4);
/* step 2: color combinations in a temp table with an index */
create table #colorCombinations (
id int not null identity (1,1) primary key clustered
, color1 int , color2 int , color3 int , color4 int
);
insert into #colorCombinations (color1, color2, color3, color4)
select distinct color1 , color2 , color3 , color4
from @colorCombinations;
create nonclustered index ix_colorCombinations
on #colorCombinations (color1, color2, color3, color4);
/* step 3: insert new color combinations into #tempTable */
create table #tempTable (color1 int
, color2 int
, color3 int
, color4 int
, newToken uniqueidentifier default newid()
, ordinal int identifier(1,1)
);
insert into #tempTable(color1, color2, color3, color4)
select color1, color2, color3, color4
from #colorCombination
except
select color1, color2, color3, color4
from #productColors
<小时/> 旧答案:
使用except
也会对@tempTable
中的行进行重复数据删除(根据您的使用方式,#temptable
代替@TableVariable
可能会更好)。
如果您不需要对@tempTable
中的行进行重复数据删除,那么您可以使用not exists()
(或将distinct
添加到select
,但这需要性能打击):
insert into @tempTable
select color1, color2, color3, color4
from @colorCombination cc
where not exists (
select 1
from products p
inner join attributes c1 on c1.pId = p. Id and c1.type = 'primary'
inner join attributes c2 on c2.pId = p. Id and c2.type = 'secondary'
inner join attributes c3 on c3.pId = p. Id and c3.type = 'other1'
inner join attributes c4 on c4.pId = p. Id and c4.type = 'other2'
where p.category = 'furniture'
and c1.color = cc.color1
and c2.color = cc.color2
and c3.color = cc.color3
and c4.color = cc.color4
)