为什么我的CTE加入更新比我的Table变量加入慢得多?

时间:2016-09-27 18:00:36

标签: sql-server tsql sql-server-2014 table-variable

我见过几个类似的线程,但它们似乎都是关于大型数据库的。我今天早上在一个小型实时数据库中看到这个问题后,我创建了一个虚拟数据库来演示这个问题。

此数据的基础如下:公司跟踪其100个客户的股票投资组合。 1000只股票中的每一只都有每日记录,列出拥有它的四个投资者及其百分比。不幸的是,它有一个小故障,允许所有者多次出现。该过程解析数据并将记录分离出来,因此每天每个库存有4条记录,然后它将为每个所有者添加组合总计。但是,因为有多个记录,这可能会夸大该所有者的价值。因此,插入一个标志来识别任何这些重复项。稍后在代码中,每行的值乘以该标志,如果没有,则为0,如果不是,则为1。

我有五种更新该标志的方法。我从0开始,这只是使用带有SELECT语句的CTE作为基线;大约需要0.07秒。 1使用带有JOIN的CTE来更新表,大约需要48秒。 2使用嵌套的select语句而不是CTE,大约需要48秒。 3将CTE转储到表变量并加入到该变量并大约需要0.13秒。 4我原本认为效率最低,因为它使用计数器循环并一次更新一行,但只花了0.17秒。 5使用CASE语句更新所有行,加入CTE,大约需要48秒。

DECLARE @OwnRec TABLE (
      StockID           INT
    , TradeDate         DATE
    , Shares            DECIMAL(4,0)
    , Price             DECIMAL(4,2)
    , Owner1            INT
    , Owner1Pct         DECIMAL(3,2)
    , Owner2            INT
    , Owner2Pct         DECIMAL(3,2)
    , Owner3            INT
    , Owner3Pct         DECIMAL(3,2)
    , Owner4            INT
    , Owner4Pct         DECIMAL(3,2)
    )

DECLARE @OwnRec2 TABLE (
      RecID             INT IDENTITY
    , StockID           INT
    , TradeDate         DATE
    , Shares            DECIMAL(4,0)
    , Price             DECIMAL(4,2)
    , Owner0            INT
    , Owner0Pct         DECIMAL(3,2)
    , OwnerNum          INT
    , DupeOwner         TINYINT
    )

DECLARE @CullDupe TABLE (
      ID                INT IDENTITY
    , RecID             INT
    )

DECLARE   @Method       INT
        , @Counter1     INT = 0
        , @StartTime    DATETIME

--Populate tables with dummy data
WHILE @Counter1 < 1000
    BEGIN
        SET @Counter1 += 1
        INSERT INTO @OwnRec (
              StockID
            , TradeDate
            , Shares    
            , Price     
            , Owner1    
            , Owner1Pct
            , Owner2    
            , Owner2Pct
            , Owner3    
            , Owner3Pct
            , Owner4    
            , Owner4Pct
            )
        SELECT @Counter1
            , '2016-09-26'
            , ROUND((RAND() * 1000 + 500)/25,0)*25
            , ROUND((RAND() * 30 + 20),2)
            , ROUND((RAND() * 100 + .5),0)
            , CAST(ROUND((RAND() * 5 + .5),0)*.05 AS DECIMAL(3,2))
            , ROUND((RAND() * 100 + .5),0)
            , CAST(ROUND((RAND() * 5 + .5),0)*.05 AS DECIMAL(3,2))
            , ROUND((RAND() * 100 + .5),0)
            , CAST(ROUND((RAND() * 5 + .5),0)*.05 AS DECIMAL(3,2))
            , ROUND((RAND() * 100 + .5),0)
            , CAST(ROUND((RAND() * 5 + .5),0)*.05 AS DECIMAL(3,2))
    END

SET @Counter1 = 0

WHILE @Counter1 < 1000
    BEGIN
        SET @Counter1 += 1
        INSERT INTO @OwnRec (
              StockID
            , TradeDate
            , Shares    
            , Price     
            , Owner1    
            , Owner1Pct
            , Owner2    
            , Owner2Pct
            , Owner3    
            , Owner3Pct
            , Owner4    
            , Owner4Pct
            )
        SELECT @Counter1 + 1000
            , '2016-09-27'
            , Shares
            , ROUND(Price * ROUND(RAND()*10 + .5,0)*.01+.95,2)
            , Owner1    
            , Owner1Pct
            , Owner2    
            , Owner2Pct
            , Owner3    
            , Owner3Pct
            , Owner4    
            , Owner4Pct
            FROM @OwnRec WHERE StockID = @Counter1
    END

UPDATE orx
    SET Owner2Pct = Owner1Pct
        FROM @OwnRec orx
            WHERE Owner1 = Owner2

UPDATE orx
    SET Owner3Pct = Owner1Pct
        FROM @OwnRec orx
            WHERE Owner1 = Owner3

UPDATE orx
    SET Owner4Pct = Owner1Pct
        FROM @OwnRec orx
            WHERE Owner1 = Owner4

UPDATE orx
    SET Owner3Pct = Owner2Pct
        FROM @OwnRec orx
            WHERE Owner2 = Owner3

UPDATE orx
    SET Owner4Pct = Owner2Pct
        FROM @OwnRec orx
            WHERE Owner2 = Owner4

UPDATE orx
    SET Owner4Pct = Owner3Pct
        FROM @OwnRec orx
            WHERE Owner3 = Owner4

INSERT INTO @OwnRec2
    SELECT StockID, TradeDate, Shares, Price, Owner1 AS Owner0, Owner1Pct, 1, 1 AS Owner0Pct
        FROM @OwnRec
    UNION
    SELECT StockID, TradeDate, Shares, Price, Owner2 AS Owner0, Owner2Pct, 2, 1 AS Owner0Pct
        FROM @OwnRec
    UNION
    SELECT StockID, TradeDate, Shares, Price, Owner3 AS Owner0, Owner3Pct, 3, 1 AS Owner0Pct
        FROM @OwnRec
    UNION
    SELECT StockID, TradeDate, Shares, Price, Owner4 AS Owner0, Owner4Pct, 4, 1 AS Owner0Pct
        FROM @OwnRec
--END Populate tables with dummy data

SET @StartTime = GETDATE()

SET @Method = 5 -- Choose which method to test


--CASE 0: Just identify duplicates

IF @Method = 0
    BEGIN
        ; WITH CullDupe
            AS (
                SELECT RecID, ROW_NUMBER() OVER (PARTITION BY StockID, TradeDate, Owner0 ORDER BY OwnerNum) AS rn
                    FROM @OwnRec2
                )
        SELECT * FROM CullDupe WHERE rn > 1
    END


--CASE 1: Update on JOIN to CTE

IF @Method = 1
    BEGIN
        ; WITH CullDupe
            AS (
                SELECT RecID, ROW_NUMBER() OVER (PARTITION BY StockID, TradeDate, Owner0 ORDER BY OwnerNum) AS rn
                    FROM @OwnRec2
                )
        UPDATE OR2
            SET DupeOwner = 0
                FROM @OwnRec2 OR2
                    JOIN CullDupe cd
                        ON OR2.RecID = cd.RecID
                    WHERE rn > 1
    END


--CASE 2: Update on JOIN to nested SELECT

IF @Method = 2
    BEGIN
        UPDATE OR2
            SET DupeOwner = 0
                FROM @OwnRec2 OR2
                    JOIN (SELECT RecID, ROW_NUMBER() OVER
                        (PARTITION BY StockID, TradeDate, Owner0 ORDER BY OwnerNum) AS rn
                        FROM @OwnRec2) cd
                        ON OR2.RecID = cd.RecID
                    WHERE rn > 1
    END


--CASE 3: Update on JOIN to temp table

IF @Method = 3
    BEGIN
        ; WITH CullDupe
            AS (
                SELECT RecID, ROW_NUMBER() OVER (PARTITION BY StockID, TradeDate, Owner0 ORDER BY OwnerNum) AS rn
                    FROM @OwnRec2
                )

        INSERT INTO @CullDupe SELECT RecID FROM CullDupe WHERE rn > 1

        UPDATE OR2
            SET DupeOwner = 0
                FROM @OwnRec2 OR2
                    JOIN @CullDupe cd
                        ON OR2.RecID = cd.RecID
    END


--CASE 4: Update using counted loop

IF @Method = 4
    BEGIN
        ; WITH CullDupe
            AS (
                SELECT RecID, ROW_NUMBER() OVER (PARTITION BY StockID, TradeDate, Owner0 ORDER BY OwnerNum) AS rn
                    FROM @OwnRec2
                )

        INSERT INTO @CullDupe SELECT RecID FROM CullDupe WHERE rn > 1
        SET @Counter1 = 0
        WHILE @Counter1 < (SELECT MAX(ID) FROM @CullDupe)
            BEGIN
                SET @Counter1 += 1
                UPDATE OR2
                    SET DupeOwner = 0
                        FROM @OwnRec2 OR2
                            WHERE RecID = (SELECT RecID FROM @CullDupe WHERE ID = @Counter1)
            END
    END


--CASE 5: Update using JOIN to CTE, but updating all rows (CASE to identify)

IF @Method = 5
    BEGIN
        ; WITH CullDupe
            AS (
                SELECT RecID, ROW_NUMBER() OVER (PARTITION BY StockID, TradeDate, Owner0 ORDER BY OwnerNum) AS rn
                    FROM @OwnRec2
                )

        UPDATE OR2
            SET DupeOwner = CASE WHEN rn > 1 THEN 0 ELSE 1 END
                FROM @OwnRec2 OR2
                    JOIN CullDupe cd
                        ON OR2.RecID = cd.RecID
    END

SELECT 'Method ' + CAST(@Method AS NVARCHAR(1)) + ': ' + CAST(DATEDIFF(ms,@StartTime,GETDATE()) AS NVARCHAR(10)) + ' milliseconds'

1 个答案:

答案 0 :(得分:2)

这是表变量的常见问题。

引用它们的语句的执行计划在批处理开始执行之前编译,因此在执行insert语句之前编译。

如果您选择一个问题执行计划并查看属性窗口,您将看到表基数为0。

enter image description here

尽管如此,它仍然假设从空表中发出1行,因为这是大多数情况下执行计划中叶子操作符的最小行估计。嵌套循环内部的子树对驱动表中的每一行执行一次。由于估计这是1行,因此估计下面突出显示的子树执行一次。事实上,整个子树将执行8000次(包括昂贵的表扫描和排序操作符)。

enter image description here

当您将行编号的结果具体化为表变量时,您将存储该子树的结果,从而确保仅计算一次(尽管使用它的计划仍然具有次优嵌套循环连接到新表变量)。

单行估计的常见解决方案是将OPTION (RECOMPILE)添加到问题语句中,以便可以考虑语句执行时的表基数,或使用跟踪标志2453(可以在基数更改后触发自动重新编译) 或者使用#temp表(可以触发自动重新编译并从列统计中获益)

有关其中一些内容的更多详细信息,请参见in my answer here