在不同行上合并两个表的最快方法?

时间:2018-06-26 10:13:00

标签: sql-server tsql

我有两个具有相同列名的表:CookieID (bigint)

  1. 表(Cookies)具有500万条记录
  2. 表(CookiePurpose)拥有1000万条记录

我想创建一个新表,只使用#1中不存在的#1 CookieID。

以下方法花费了20分钟以上。

CREATE TABLE #KnownCookieIDs

(
    [CookieID] [bigint] NOT NULL
)

INSERT INTO 
#KnownCookieIDs
SELECT CookieID FROM CookiePurpose
EXCEPT
SELECT CookieID from Cookies

有没有更快的方法来实现这一目标?

更新

我正在具有S0层计划的Azure数据库上进行测试,该计划具有10个DTU。由于查询时间很长是因为这个原因,所以我正在将数据库导出到我的本地计算机上,并将从那里尝试您的建议。

2 个答案:

答案 0 :(得分:1)

此查询应运行得更快:

CREATE TABLE #KnownCookieIDs

(
    [CookieID] [bigint] NOT NULL
)

INSERT INTO 
    #KnownCookieIDs
SELECT 
    c.CookieID 
FROM 
    Cookie c
    LEFT JOIN CookiePurpose cp ON c.CookieID = cp.CookieID
WHERE
    cp.CookieID IS NULL

答案 1 :(得分:0)

所以,我决定做一点实验。在OP大小的1/10的数据集上显示的结果显示,子选择的性能优于左联接。虽然,我希望左联接在较大的数据集中能发挥最佳性能。更新:我建立了一个像OP这样的数据集,而LEFT JOIN仍然没有执行子选择,但是非常接近。

确保两个表上都有索引对于提高速度至关重要。因此,请确保您的表已正确索引。

/*
DROP TABLE Cookie
CREATE TABLE Cookie ([CookieID] [bigint] NOT NULL IDENTITY(1,1) PRIMARY KEY,Dummy bit)

DROP TABLE CookiePurpose
CREATE TABLE CookiePurpose (CookiePurposeID [bigint] NOT NULL IDENTITY(1,1) PRIMARY KEY, [CookieID] [bigint] NOT NULL)
CREATE NONCLUSTERED INDEX IX_CookiePurpose_CookieID ON dbo.CookiePurpose (CookieID)

SET NOCOUNT ON

DECLARE @CookieCounter BIGINT=(SELECT COUNT(CookieID) FROM Cookie)
WHILE @CookieCounter<500000 BEGIN INSERT INTO Cookie(Dummy) SELECT 1; SET @CookieCounter=@CookieCounter+1; END

DECLARE @CookiePurposeCounter BIGINT=(SELECT COUNT(CookieID) FROM CookiePurpose)
WHILE @CookiePurposeCounter<1000000 BEGIN INSERT INTO CookiePurpose(CookieID) SELECT Round(( ( 1000000000 - 1 - 1 ) * Rand() + 1 ), 0); SET @CookiePurposeCounter=@CookiePurposeCounter+1; END

*/

--EXCEPT: 300ms @ 1/10 data; 2917ms for full data
DECLARE @start DATETIME2,@end DATETIME2; SET @start=GETDATE()
DECLARE  @KnownCookieIDs TABLE ([CookieID] [bigint] NOT NULL)
INSERT INTO @KnownCookieIDs
    SELECT CookieID FROM CookiePurpose
    EXCEPT
    SELECT CookieID from Cookie
SET @end=GETDATE(); SELECT DATEDIFF(ms,@start,@end) AS [ms elapsed];
GO

--LEFT JOIN: 123ms @ 1/10 data; 1093ms for full data
DECLARE @start DATETIME2,@end DATETIME2; SET @start=GETDATE()
DECLARE  @KnownCookieIDs TABLE ([CookieID] [bigint] NOT NULL)
INSERT INTO @KnownCookieIDs
    SELECT c.CookieID
    FROM Cookie c
    LEFT JOIN  CookiePurpose cp ON cp.CookieID=c.CookieID
    WHERE cp.CookieID IS NULL
SET @end=GETDATE(); SELECT DATEDIFF(ms,@start,@end) AS [ms elapsed];
GO

--sub-select: 113 ms @ 1/10 data; 1046ms for full data
DECLARE @start DATETIME2,@end DATETIME2; SET @start=GETDATE()
DECLARE  @KnownCookieIDs TABLE ([CookieID] [bigint] NOT NULL)
INSERT INTO @KnownCookieIDs
    SELECT c.CookieID
    FROM Cookie c
    WHERe c.CookieID NOT IN (
        SELECT CookieID FROM CookiePurpose
    )
SET @end=GETDATE(); SELECT DATEDIFF(ms,@start,@end) AS [ms elapsed];
GO


--Delete: 767ms @ 1/10 data; 8450ms for full data
DECLARE @start DATETIME2,@end DATETIME2; SET @start=GETDATE()
DECLARE  @KnownCookieIDs TABLE ([CookieID] [bigint] NOT NULL)
INSERT INTO @KnownCookieIDs
    SELECT c.CookieID
    FROM Cookie c

DELETE c
FROM @KnownCookieIDs c
INNER JOIN CookiePurpose cp ON cp.CookieID=c.CookieID

SET @end=GETDATE(); SELECT DATEDIFF(ms,@start,@end) AS [ms elapsed];
GO