我想做一些快速插入,但避免重复到表中。 为了论证,我们称之为MarketPrices,我一直在尝试两种方法,但不确定如何更快地进行基准测试。
INSERT INTO MarketPrices (SecurityCode, BuyPrice, SellPrice, IsMarketOpen)
SELECT @SecurityCode, @BuyPrice, @SellPrice, @IsMarketOpen
EXCEPT
SELECT SecurityCode, BuyPrice, SellPrice, j.bool as IsActive FROM MarketPrices
CROSS JOIN (SELECT 0 as bool UNION SELECT 1 as bool ) as j
OR
DECLARE @MktId int
SET @MktId = (SELECT SecurityId FROM MarketPrices
where SecurityCode = @SecurityCode
and BuyPrice=@BuyPrice
and SellPrice = @SellPrice)
IF (@MktId is NULL)
BEGIN
INSERT INTO MarketPrices (SecurityCode, BuyPrice, SellPrice, IsMarketOpen)
VALUES
(@SecurityCode,@BuyPrice, @SellPrice, @IsMarketOpen)
END
假设@whatever
是存储过程中的输入参数。
我希望能够在BuyPrice或SellPrice或两者与之前的其他所有事件不同时为每个SecurityCode插入新记录。我不关心IsMarketOpen。
对于上述任何一种方法,有什么明显的愚蠢之处吗?一个比另一个快吗?
答案 0 :(得分:11)
编辑:要阻止并发环境中的race conditions,请在相关子查询中使用WITH (UPDLOCK)
或EXCEPT
'd SELECT
。我在下面编写的测试脚本不需要它,因为它使用仅对当前连接可见的临时表,但在真实环境中,对用户表进行操作时,它是必要的。
MERGE
不需要UPDLOCK
。
灵感来自mcl的答案:独特的索引&让数据库抛出错误,我决定将conditional inserts与try/catch进行对比。
结果似乎支持try / catch上的条件插入,但支持YMMV。这是一个非常简单的场景(一列,小桌子等),在一台机器上执行等等。
以下是结果(SQL Server 2008,版本10.0.1600.2):
duplicates (short table)
try/catch: 14440 milliseconds / 100000 inserts
conditional insert: 2983 milliseconds / 100000 inserts
except: 2966 milliseconds / 100000 inserts
merge: 2983 milliseconds / 100000 inserts
uniques
try/catch: 3920 milliseconds / 100000 inserts
conditional insert: 3860 milliseconds / 100000 inserts
except: 3873 milliseconds / 100000 inserts
merge: 3890 milliseconds / 100000 inserts
straight insert: 3173 milliseconds / 100000 inserts
duplicates (tall table)
try/catch: 14436 milliseconds / 100000 inserts
conditional insert: 3063 milliseconds / 100000 inserts
except: 3063 milliseconds / 100000 inserts
merge: 3030 milliseconds / 100000 inserts
请注意,即使在唯一插入上,尝试/捕获的略微比条件插入更多。我想知道这是否因版本,CPU,内核数量等而异。
我没有对IF
条件插入进行基准测试,只是WHERE
。我假设IF
种类会显示更多开销,因为a)你会有两个语句,并且b)你需要将两个语句包装在一个事务中并将隔离级别设置为serializable(!)。如果某人想要来测试它,则需要将临时表更改为常规用户表(可序列化不适用于本地临时表)。
这是脚本:
-- tested on SQL 2008.
-- to run on SQL 2005, comment out the statements using MERGE
set nocount on
if object_id('tempdb..#temp') is not null drop table #temp
create table #temp (col1 int primary key)
go
-------------------------------------------------------
-- duplicate insert test against a table w/ 1 record
-------------------------------------------------------
insert #temp values (1)
go
declare @x int, @y int, @now datetime, @duration int
select @x = 1, @y = 0, @now = getdate()
while @y < 100000 begin
set @y = @y+1
begin try
insert #temp select @x
end try
begin catch end catch
end
set @duration = datediff(ms,@now,getdate())
raiserror('duplicates (short table), try/catch: %i milliseconds / %i inserts',-1,-1,@duration,@y) with nowait
go
declare @x int, @y int, @now datetime, @duration int
select @x = 1, @y = 0, @now = getdate()
while @y < 100000 begin
set @y = @y+1
insert #temp select @x where not exists (select * from #temp where col1 = @x)
end
set @duration = datediff(ms,@now,getdate())
raiserror('duplicates (short table), conditional insert: %i milliseconds / %i inserts',-1,-1,@duration, @y) with nowait
go
declare @x int, @y int, @now datetime, @duration int
select @x = 1, @y = 0, @now = getdate()
while @y < 100000 begin
set @y = @y+1
insert #temp select @x except select col1 from #temp
end
set @duration = datediff(ms,@now,getdate())
raiserror('duplicates (short table), except: %i milliseconds / %i inserts',-1,-1,@duration, @y) with nowait
go
-- comment this batch out for SQL 2005
declare @x int, @y int, @now datetime, @duration int
select @x = 1, @y = 0, @now = getdate()
while @y < 100000 begin
set @y = @y+1
merge #temp t using (select @x) s (col1) on t.col1 = s.col1 when not matched by target then insert values (col1);
end
set @duration = datediff(ms,@now,getdate())
raiserror('duplicates (short table), merge: %i milliseconds / %i inserts',-1,-1,@duration, @y) with nowait
go
-------------------------------------------------------
-- unique insert test against an initially empty table
-------------------------------------------------------
truncate table #temp
declare @x int, @now datetime, @duration int
select @x = 0, @now = getdate()
while @x < 100000 begin
set @x = @x+1
insert #temp select @x
end
set @duration = datediff(ms,@now,getdate())
raiserror('uniques, straight insert: %i milliseconds / %i inserts',-1,-1,@duration, @x) with nowait
go
truncate table #temp
declare @x int, @now datetime, @duration int
select @x = 0, @now = getdate()
while @x < 100000 begin
set @x = @x+1
begin try
insert #temp select @x
end try
begin catch end catch
end
set @duration = datediff(ms,@now,getdate())
raiserror('uniques, try/catch: %i milliseconds / %i inserts',-1,-1,@duration, @x) with nowait
go
truncate table #temp
declare @x int, @now datetime, @duration int
select @x = 0, @now = getdate()
while @x < 100000 begin
set @x = @x+1
insert #temp select @x where not exists (select * from #temp where col1 = @x)
end
set @duration = datediff(ms,@now,getdate())
raiserror('uniques, conditional insert: %i milliseconds / %i inserts',-1,-1,@duration, @x) with nowait
go
truncate table #temp
declare @x int, @now datetime, @duration int
select @x = 0, @now = getdate()
while @x < 100000 begin
set @x = @x+1
insert #temp select @x except select col1 from #temp
end
set @duration = datediff(ms,@now,getdate())
raiserror('uniques, except: %i milliseconds / %i inserts',-1,-1,@duration, @x) with nowait
go
-- comment this batch out for SQL 2005
truncate table #temp
declare @x int, @now datetime, @duration int
select @x = 1, @now = getdate()
while @x < 100000 begin
set @x = @x+1
merge #temp t using (select @x) s (col1) on t.col1 = s.col1 when not matched by target then insert values (col1);
end
set @duration = datediff(ms,@now,getdate())
raiserror('uniques, merge: %i milliseconds / %i inserts',-1,-1,@duration, @x) with nowait
go
-------------------------------------------------------
-- duplicate insert test against a table w/ 100000 records
-------------------------------------------------------
declare @x int, @y int, @now datetime, @duration int
select @x = 1, @y = 0, @now = getdate()
while @y < 100000 begin
set @y = @y+1
begin try
insert #temp select @x
end try
begin catch end catch
end
set @duration = datediff(ms,@now,getdate())
raiserror('duplicates (tall table), try/catch: %i milliseconds / %i inserts',-1,-1,@duration,@y) with nowait
go
declare @x int, @y int, @now datetime, @duration int
select @x = 1, @y = 0, @now = getdate()
while @y < 100000 begin
set @y = @y+1
insert #temp select @x where not exists (select * from #temp where col1 = @x)
end
set @duration = datediff(ms,@now,getdate())
raiserror('duplicates (tall table), conditional insert: %i milliseconds / %i inserts',-1,-1,@duration, @y) with nowait
go
declare @x int, @y int, @now datetime, @duration int
select @x = 1, @y = 0, @now = getdate()
while @y < 100000 begin
set @y = @y+1
insert #temp select @x except select col1 from #temp
end
set @duration = datediff(ms,@now,getdate())
raiserror('duplicates (tall table), except: %i milliseconds / %i inserts',-1,-1,@duration, @y) with nowait
go
-- comment this batch out for SQL 2005
declare @x int, @y int, @now datetime, @duration int
select @x = 1, @y = 0, @now = getdate()
while @y < 100000 begin
set @y = @y+1
merge #temp t using (select @x) s (col1) on t.col1 = s.col1 when not matched by target then insert values (col1);
end
set @duration = datediff(ms,@now,getdate())
raiserror('duplicates (tall table), merge: %i milliseconds / %i inserts',-1,-1,@duration, @y) with nowait
go
答案 1 :(得分:6)
编辑:要在并发环境中阻止race conditions,请在相关子查询中使用WITH (UPDLOCK)
。
我认为这将是标准方法:
INSERT INTO MarketPrices (SecurityCode, BuyPrice, SellPrice, IsMarketOpen)
SELECT @SecurityCode, @BuyPrice, @SellPrice, @IsMarketOpen
WHERE NOT EXISTS (
SELECT * FROM MarketPrices WITH (UPDLOCK)
WHERE SecurityCode = @SecurityCode
AND BuyPrice = @BuyPrice
AND SellPrice = @SellPrice
)
如果您的任何字段可以为空,则必须将其添加到条件中。
你的第一种方法很有意思,但是对EXCEPT的要求让你跳过了箍。这种方法基本相同,但它可以解决列匹配问题。
可替换地:
INSERT INTO MarketPrices (SecurityCode, BuyPrice, SellPrice, IsMarketOpen)
SELECT SecurityCode, BuyPrice, SellPrice, @IsMarketOpen
FROM (
SELECT @SecurityCode, @BuyPrice, @SellPrice
EXCEPT
SELECT SecurityCode, BuyPrice, SellPrice FROM MarketPrices WITH (UPDLOCK)
) a (SecurityCode, BuyPrice, SellPrice)
在这个实例中,EXCEPT的好处是它可以处理NULL而无需您进行任何额外的编码。要在第一个示例中实现相同的功能,您需要测试每个对的NULL以及相等的长手。
你的第二种方法没问题,但你不需要变量。看到Tomalak的解决方案,他很好地清理了它。此外,如果需要考虑,则需要明确处理并发插入的可能性。
答案 2 :(得分:3)
我会随时寻找语义解决方案。你的两个提议对我来说似乎很模糊(虽然后者比前者好)。
IF NOT EXISTS (
SELECT 1
FROM MarketPrices
WHERE SecurityCode = @SecurityCode
AND BuyPrice = @BuyPrice
AND SellPrice = @SellPrice
)
BEGIN
INSERT MarketPrices
(SecurityCode, BuyPrice, SellPrice, IsMarketOpen)
VALUES
(@SecurityCode, @BuyPrice, @SellPrice, @IsMarketOpen)
END
如果合并指数超过SecurityCode, BuyPrice, SellPrice
,则EXISTS
查询应该会相当快。
基准测试是一个WHILE
循环计时的问题,我想说。测试一下,亲眼看看。
答案 3 :(得分:2)
另一种选择:在相关字段(SecurityCode,BuyPrice,SellPrice)上创建唯一索引,发出简单插入,并让数据库决定记录是否重复。插入将在尝试插入副本时失败。
使用代码(无论是外部语言还是SQL proc)来保证唯一性并不严格,最终会导致您希望阻止的重复项。
答案 4 :(得分:1)
下面我将Only inserting a row if it's not already there的最佳答案添加到Peter Radocchia的优秀答案中。
需要注意的是,当没有实际碰撞时,使用race safe with try/catch
技术比race safe with updlock, holdlock
技术稍微(~1%)快(即你预计碰撞将非常罕见 - 这就是uniques
场景),当总是发生冲突时(这是duplicates
场景),它会慢一点(~20%)。这不会考虑锁升级等复杂问题。
以下是结果(SQL Server 2014,版本12.0.2000.8):
duplicates (short table)
try/catch: 15546 milliseconds / 100000 inserts
conditional insert: 1460 milliseconds / 100000 inserts
except: 1490 milliseconds / 100000 inserts
merge: 1420 milliseconds / 100000 inserts
race safe with try/catch: 1650 milliseconds / 100000 inserts
race safe with updlock, holdlock: 1330 milliseconds / 100000 inserts
uniques
try/catch: 2266 milliseconds / 100000 inserts
conditional insert: 2156 milliseconds / 100000 inserts
except: 2273 milliseconds / 100000 inserts
merge: 2136 milliseconds / 100000 inserts
race safe with try/catch: 2400 milliseconds / 100000 inserts
race safe with updlock, holdlock: 2430 milliseconds / 100000 inserts
straight insert: 1686 milliseconds / 100000 inserts
duplicates (tall table)
try/catch: 15826 milliseconds / 100000 inserts
conditional insert: 1530 milliseconds / 100000 inserts
except: 1506 milliseconds / 100000 inserts
merge: 1443 milliseconds / 100000 inserts
race safe with try/catch: 1636 milliseconds / 100000 inserts
race safe with updlock, holdlock: 1426 milliseconds / 100000 inserts
重复(短表)部分:
declare @x int, @y int, @now datetime, @duration int
select @x = 1, @y = 0, @now = getdate()
while @y < 100000 begin
set @y = @y+1
begin try
insert #temp select @x where not exists (select * from #temp where col1 = @x)
end try
begin catch
if error_number() <> 2627
throw
end catch
end
set @duration = datediff(ms,@now,getdate())
raiserror('duplicates (short table), race safe with try/catch: %i milliseconds / %i inserts',-1,-1,@duration,@y) with nowait
go
declare @x int, @y int, @now datetime, @duration int
select @x = 1, @y = 0, @now = getdate()
while @y < 100000 begin
set @y = @y+1
insert #temp select @x where not exists (select * from #temp with (updlock, holdlock) where col1 = @x)
end
set @duration = datediff(ms,@now,getdate())
raiserror('duplicates (short table), race safe with updlock, holdlock: %i milliseconds / %i inserts',-1,-1,@duration, @y) with nowait
go
唯一身份
truncate table #temp
declare @x int, @now datetime, @duration int
select @x = 0, @now = getdate()
while @x < 100000 begin
set @x = @x+1
begin try
insert #temp select @x where not exists (select * from #temp where col1 = @x)
end try
begin catch
if error_number() <> 2627
throw
end catch
end
set @duration = datediff(ms,@now,getdate())
raiserror('uniques, race safe with try/catch: %i milliseconds / %i inserts',-1,-1,@duration, @x) with nowait
go
truncate table #temp
declare @x int, @now datetime, @duration int
select @x = 0, @now = getdate()
while @x < 100000 begin
set @x = @x+1
insert #temp select @x where not exists (select * from #temp with (updlock, holdlock) where col1 = @x)
end
set @duration = datediff(ms,@now,getdate())
raiserror('uniques, race safe with updlock, holdlock: %i milliseconds / %i inserts',-1,-1,@duration, @x) with nowait
go
重复(高桌)部分
declare @x int, @y int, @now datetime, @duration int
select @x = 1, @y = 0, @now = getdate()
while @y < 100000 begin
set @y = @y+1
begin try
insert #temp select @x where not exists (select * from #temp where col1 = @x)
end try
begin catch
if error_number() <> 2627
throw
end catch
end
set @duration = datediff(ms,@now,getdate())
raiserror('duplicates (tall table), race safe with try/catch: %i milliseconds / %i inserts',-1,-1,@duration,@y) with nowait
go
declare @x int, @y int, @now datetime, @duration int
select @x = 1, @y = 0, @now = getdate()
while @y < 100000 begin
set @y = @y+1
insert #temp select @x where not exists (select * from #temp with (updlock, holdlock) where col1 = @x)
end
set @duration = datediff(ms,@now,getdate())
raiserror('duplicates (tall table), race safe with updlock, holdlock: %i milliseconds / %i inserts',-1,-1,@duration, @y) with nowait
go
答案 5 :(得分:0)
如果您不需要捕获重复项,则始终可以创建一个唯一索引,并将“ignore duplicates”设置为true。 SQL Server将为您解决此问题。