SQL INSERT但避免重复

时间:2009-11-06 16:17:09

标签: sql sql-server sql-server-2005

我想做一些快速插入,但避免重复到表中。 为了论证,我们称之为MarketPrices,我一直在尝试两种方法,但不确定如何更快地进行基准测试。

INSERT INTO MarketPrices (SecurityCode, BuyPrice, SellPrice, IsMarketOpen)
SELECT @SecurityCode, @BuyPrice,  @SellPrice, @IsMarketOpen
EXCEPT
SELECT SecurityCode, BuyPrice, SellPrice, j.bool as IsActive FROM MarketPrices
CROSS JOIN (SELECT 0 as bool UNION SELECT 1 as bool ) as j

OR

DECLARE @MktId int
SET @MktId = (SELECT SecurityId FROM MarketPrices 
              where SecurityCode = @SecurityCode 
              and BuyPrice=@BuyPrice 
              and SellPrice = @SellPrice)

IF (@MktId is NULL)  
BEGIN
    INSERT INTO MarketPrices (SecurityCode, BuyPrice, SellPrice, IsMarketOpen)
    VALUES
    (@SecurityCode,@BuyPrice, @SellPrice, @IsMarketOpen)
END

假设@whatever是存储过程中的输入参数。

我希望能够在BuyPrice或SellPrice或两者与之前的其他所有事件不同时为每个SecurityCode插入新记录。我不关心IsMarketOpen。​​

对于上述任何一种方法,有什么明显的愚蠢之处吗?一个比另一个快吗?

6 个答案:

答案 0 :(得分:11)

编辑:要阻止并发环境中的race conditions,请在相关子查询中使用WITH (UPDLOCK)EXCEPT'd SELECT。我在下面编写的测试脚本不需要它,因为它使用仅对当前连接可见的临时表,但在真实环境中,对用户表进行操作时,它是必要的。

MERGE不需要UPDLOCK


灵感来自mcl的答案:独特的索引&让数据库抛出错误,我决定将conditional insertstry/catch进行对比。

结果似乎支持try / catch上的条件插入,但支持YMMV。这是一个非常简单的场景(一列,小桌子等),在一台机器上执行等等。

以下是结果(SQL Server 2008,版本10.0.1600.2):

duplicates (short table)    
  try/catch:                14440 milliseconds / 100000 inserts
  conditional insert:        2983 milliseconds / 100000 inserts
  except:                    2966 milliseconds / 100000 inserts
  merge:                     2983 milliseconds / 100000 inserts

uniques
  try/catch:                 3920 milliseconds / 100000 inserts
  conditional insert:        3860 milliseconds / 100000 inserts
  except:                    3873 milliseconds / 100000 inserts
  merge:                     3890 milliseconds / 100000 inserts

  straight insert:           3173 milliseconds / 100000 inserts

duplicates (tall table)
  try/catch:                14436 milliseconds / 100000 inserts
  conditional insert:        3063 milliseconds / 100000 inserts
  except:                    3063 milliseconds / 100000 inserts
  merge:                     3030 milliseconds / 100000 inserts

请注意,即使在唯一插入上,尝试/捕获的略微比条件插入更多。我想知道这是否因版本,CPU,内核数量等而异。

我没有对IF条件插入进行基准测试,只是WHERE。我假设IF种类会显示更多开销,因为a)你会有两个语句,并且b)你需要将两个语句包装在一个事务中并将隔离级别设置为serializable(!)。如果某人想要来测试它,则需要将临时表更改为常规用户表(可序列化不适用于本地临时表)。

这是脚本:

-- tested on SQL 2008.
-- to run on SQL 2005, comment out the statements using MERGE
set nocount on

if object_id('tempdb..#temp') is not null drop table #temp
create table #temp (col1 int primary key)
go

-------------------------------------------------------

-- duplicate insert test against a table w/ 1 record

-------------------------------------------------------

insert #temp values (1)
go

declare @x int, @y int, @now datetime, @duration int
select @x = 1, @y = 0, @now = getdate()
while @y < 100000 begin
  set @y = @y+1
  begin try 
    insert #temp select @x
  end try
  begin catch end catch
end
set @duration = datediff(ms,@now,getdate())
raiserror('duplicates (short table), try/catch: %i milliseconds / %i inserts',-1,-1,@duration,@y) with nowait
go

declare @x int, @y int, @now datetime, @duration int
select @x = 1, @y = 0, @now = getdate()
while @y < 100000 begin
  set @y = @y+1
  insert #temp select @x where not exists (select * from #temp where col1 = @x)
end
set @duration = datediff(ms,@now,getdate())
raiserror('duplicates (short table), conditional insert: %i milliseconds / %i inserts',-1,-1,@duration, @y) with nowait
go

declare @x int, @y int, @now datetime, @duration int
select @x = 1, @y = 0, @now = getdate()
while @y < 100000 begin
  set @y = @y+1
  insert #temp select @x except select col1 from #temp
end
set @duration = datediff(ms,@now,getdate())
raiserror('duplicates (short table), except: %i milliseconds / %i inserts',-1,-1,@duration, @y) with nowait
go

-- comment this batch out for SQL 2005
declare @x int, @y int, @now datetime, @duration int
select @x = 1, @y = 0, @now = getdate()
while @y < 100000 begin
  set @y = @y+1
  merge #temp t using (select @x) s (col1) on t.col1 = s.col1 when not matched by target then insert values (col1);
end
set @duration = datediff(ms,@now,getdate())
raiserror('duplicates (short table), merge: %i milliseconds / %i inserts',-1,-1,@duration, @y) with nowait
go

-------------------------------------------------------

-- unique insert test against an initially empty table

-------------------------------------------------------

truncate table #temp
declare @x int, @now datetime, @duration int
select @x = 0, @now = getdate()
while @x < 100000 begin
  set @x = @x+1
  insert #temp select @x
end
set @duration = datediff(ms,@now,getdate())
raiserror('uniques, straight insert: %i milliseconds / %i inserts',-1,-1,@duration, @x) with nowait
go

truncate table #temp
declare @x int, @now datetime, @duration int
select @x = 0, @now = getdate()
while @x < 100000 begin
  set @x = @x+1
  begin try 
    insert #temp select @x
  end try
  begin catch end catch
end
set @duration = datediff(ms,@now,getdate())
raiserror('uniques, try/catch: %i milliseconds / %i inserts',-1,-1,@duration, @x) with nowait
go

truncate table #temp
declare @x int, @now datetime, @duration int
select @x = 0, @now = getdate()
while @x < 100000 begin
  set @x = @x+1
  insert #temp select @x where not exists (select * from #temp where col1 = @x)
end
set @duration = datediff(ms,@now,getdate())
raiserror('uniques, conditional insert: %i milliseconds / %i inserts',-1,-1,@duration, @x) with nowait
go

truncate table #temp
declare @x int, @now datetime, @duration int
select @x = 0, @now = getdate()
while @x < 100000 begin
  set @x = @x+1
  insert #temp select @x except select col1 from #temp
end
set @duration = datediff(ms,@now,getdate())
raiserror('uniques, except: %i milliseconds / %i inserts',-1,-1,@duration, @x) with nowait
go

-- comment this batch out for SQL 2005
truncate table #temp
declare @x int, @now datetime, @duration int
select @x = 1, @now = getdate()
while @x < 100000 begin
  set @x = @x+1
  merge #temp t using (select @x) s (col1) on t.col1 = s.col1 when not matched by target then insert values (col1);
end
set @duration = datediff(ms,@now,getdate())
raiserror('uniques, merge: %i milliseconds / %i inserts',-1,-1,@duration, @x) with nowait
go

-------------------------------------------------------

-- duplicate insert test against a table w/ 100000 records

-------------------------------------------------------

declare @x int, @y int, @now datetime, @duration int
select @x = 1, @y = 0, @now = getdate()
while @y < 100000 begin
  set @y = @y+1
  begin try 
    insert #temp select @x
  end try
  begin catch end catch
end
set @duration = datediff(ms,@now,getdate())
raiserror('duplicates (tall table), try/catch: %i milliseconds / %i inserts',-1,-1,@duration,@y) with nowait
go

declare @x int, @y int, @now datetime, @duration int
select @x = 1, @y = 0, @now = getdate()
while @y < 100000 begin
  set @y = @y+1
  insert #temp select @x where not exists (select * from #temp where col1 = @x)
end
set @duration = datediff(ms,@now,getdate())
raiserror('duplicates (tall table), conditional insert: %i milliseconds / %i inserts',-1,-1,@duration, @y) with nowait
go

declare @x int, @y int, @now datetime, @duration int
select @x = 1, @y = 0, @now = getdate()
while @y < 100000 begin
  set @y = @y+1
  insert #temp select @x except select col1 from #temp
end
set @duration = datediff(ms,@now,getdate())
raiserror('duplicates (tall table), except: %i milliseconds / %i inserts',-1,-1,@duration, @y) with nowait
go

-- comment this batch out for SQL 2005
declare @x int, @y int, @now datetime, @duration int
select @x = 1, @y = 0, @now = getdate()
while @y < 100000 begin
  set @y = @y+1
  merge #temp t using (select @x) s (col1) on t.col1 = s.col1 when not matched by target then insert values (col1);
end
set @duration = datediff(ms,@now,getdate())
raiserror('duplicates (tall table), merge: %i milliseconds / %i inserts',-1,-1,@duration, @y) with nowait
go

答案 1 :(得分:6)

编辑:要在并发环境中阻止race conditions,请在相关子查询中使用WITH (UPDLOCK)


我认为这将是标准方法:

INSERT INTO MarketPrices (SecurityCode, BuyPrice, SellPrice, IsMarketOpen)
SELECT @SecurityCode, @BuyPrice,  @SellPrice, @IsMarketOpen
WHERE NOT EXISTS (
  SELECT * FROM MarketPrices WITH (UPDLOCK)
  WHERE SecurityCode = @SecurityCode 
    AND BuyPrice = @BuyPrice 
    AND SellPrice = @SellPrice
  )

如果您的任何字段可以为空,则必须将其添加到条件中。

你的第一种方法很有意思,但是对EXCEPT的要求让你跳过了箍。这种方法基本相同,但它可以解决列匹配问题。

可替换地:

INSERT INTO MarketPrices (SecurityCode, BuyPrice, SellPrice, IsMarketOpen)
SELECT SecurityCode, BuyPrice, SellPrice, @IsMarketOpen
FROM (
  SELECT @SecurityCode, @BuyPrice,  @SellPrice
  EXCEPT
  SELECT SecurityCode, BuyPrice, SellPrice FROM MarketPrices WITH (UPDLOCK)
  ) a (SecurityCode, BuyPrice, SellPrice)

在这个实例中,EXCEPT的好处是它可以处理NULL而无需您进行任何额外的编码。要在第一个示例中实现相同的功能,您需要测试每个对的NULL以及相等的长手。

你的第二种方法没问题,但你不需要变量。看到Tomalak的解决方案,他很好地清理了它。此外,如果需要考虑,则需要明确处理并发插入的可能性。

答案 2 :(得分:3)

我会随时寻找语义解决方案。你的两个提议对我来说似乎很模糊(虽然后者比前者好)。

IF NOT EXISTS (
  SELECT 1
  FROM   MarketPrices 
  WHERE  SecurityCode  = @SecurityCode 
         AND BuyPrice  = @BuyPrice 
         AND SellPrice = @SellPrice
)  
BEGIN
  INSERT MarketPrices 
    (SecurityCode,   BuyPrice,  SellPrice,  IsMarketOpen)
  VALUES 
    (@SecurityCode, @BuyPrice, @SellPrice, @IsMarketOpen)
END

如果合并指数超过SecurityCode, BuyPrice, SellPrice,则EXISTS查询应该会相当快。

基准测试是一个WHILE循环计时的问题,我想说。测试一下,亲眼看看。

答案 3 :(得分:2)

另一种选择:在相关字段(SecurityCode,BuyPrice,SellPrice)上创建唯一索引,发出简单插入,并让数据库决定记录是否重复。插入将在尝试插入副本时失败。

使用代码(无论是外部语言还是SQL proc)来保证唯一性并不严格,最终会导致您希望阻止的重复项。

答案 4 :(得分:1)

下面我将Only inserting a row if it's not already there的最佳答案添加到Peter Radocchia的优秀答案中。

需要注意的是,当没有实际碰撞时,使用race safe with try/catch技术比race safe with updlock, holdlock技术稍微(~1%)快(即你预计碰撞将非常罕见 - 这就是uniques场景),当总是发生冲突时(这是duplicates场景),它会慢一点(~20%)。这不会考虑锁升级等复杂问题。

以下是结果(SQL Server 2014,版本12.0.2000.8):

duplicates (short table)    
  try/catch:                       15546 milliseconds / 100000 inserts
  conditional insert:               1460 milliseconds / 100000 inserts
  except:                           1490 milliseconds / 100000 inserts
  merge:                            1420 milliseconds / 100000 inserts
  race safe with try/catch:         1650 milliseconds / 100000 inserts
  race safe with updlock, holdlock: 1330 milliseconds / 100000 inserts

uniques
  try/catch:                        2266 milliseconds / 100000 inserts
  conditional insert:               2156 milliseconds / 100000 inserts
  except:                           2273 milliseconds / 100000 inserts
  merge:                            2136 milliseconds / 100000 inserts
  race safe with try/catch:         2400 milliseconds / 100000 inserts
  race safe with updlock, holdlock: 2430 milliseconds / 100000 inserts

  straight insert:                  1686 milliseconds / 100000 inserts

duplicates (tall table)
  try/catch:                       15826 milliseconds / 100000 inserts
  conditional insert:               1530 milliseconds / 100000 inserts
  except:                           1506 milliseconds / 100000 inserts
  merge:                            1443 milliseconds / 100000 inserts
  race safe with try/catch:         1636 milliseconds / 100000 inserts
  race safe with updlock, holdlock: 1426 milliseconds / 100000 inserts

重复(短表)部分:

declare @x int, @y int, @now datetime, @duration int
select @x = 1, @y = 0, @now = getdate()
while @y < 100000 begin
  set @y = @y+1
  begin try 
    insert #temp select @x where not exists (select * from #temp where col1 = @x)
  end try
  begin catch 
    if error_number() <> 2627
      throw
  end catch
end
set @duration = datediff(ms,@now,getdate())
raiserror('duplicates (short table), race safe with try/catch: %i milliseconds / %i inserts',-1,-1,@duration,@y) with nowait
go

declare @x int, @y int, @now datetime, @duration int
select @x = 1, @y = 0, @now = getdate()
while @y < 100000 begin
  set @y = @y+1
  insert #temp select @x where not exists (select * from #temp with (updlock, holdlock) where col1 = @x)
end
set @duration = datediff(ms,@now,getdate())
raiserror('duplicates (short table), race safe with updlock, holdlock: %i milliseconds / %i inserts',-1,-1,@duration, @y) with nowait
go

唯一身份

truncate table #temp
declare @x int, @now datetime, @duration int
select @x = 0, @now = getdate()
while @x < 100000 begin
  set @x = @x+1
  begin try 
    insert #temp select @x where not exists (select * from #temp where col1 = @x)
  end try
  begin catch 
    if error_number() <> 2627
      throw
  end catch
end
set @duration = datediff(ms,@now,getdate())
raiserror('uniques, race safe with try/catch: %i milliseconds / %i inserts',-1,-1,@duration, @x) with nowait
go

truncate table #temp
declare @x int, @now datetime, @duration int
select @x = 0, @now = getdate()
while @x < 100000 begin
  set @x = @x+1
  insert #temp select @x where not exists (select * from #temp with (updlock, holdlock) where col1 = @x)
end
set @duration = datediff(ms,@now,getdate())
raiserror('uniques, race safe with updlock, holdlock: %i milliseconds / %i inserts',-1,-1,@duration, @x) with nowait
go

重复(高桌)部分

declare @x int, @y int, @now datetime, @duration int
select @x = 1, @y = 0, @now = getdate()
while @y < 100000 begin
  set @y = @y+1
  begin try 
    insert #temp select @x where not exists (select * from #temp where col1 = @x)
  end try
  begin catch 
    if error_number() <> 2627
      throw
  end catch
end
set @duration = datediff(ms,@now,getdate())
raiserror('duplicates (tall table), race safe with try/catch: %i milliseconds / %i inserts',-1,-1,@duration,@y) with nowait
go

declare @x int, @y int, @now datetime, @duration int
select @x = 1, @y = 0, @now = getdate()
while @y < 100000 begin
  set @y = @y+1
  insert #temp select @x where not exists (select * from #temp with (updlock, holdlock) where col1 = @x)
end
set @duration = datediff(ms,@now,getdate())
raiserror('duplicates (tall table), race safe with updlock, holdlock: %i milliseconds / %i inserts',-1,-1,@duration, @y) with nowait
go

答案 5 :(得分:0)

如果您不需要捕获重复项,则始终可以创建一个唯一索引,并将“ignore duplicates”设置为true。 SQL Server将为您解决此问题。