SQL Server遍历数百万行的更好方法

时间:2019-08-21 13:55:10

标签: sql sql-server sap database-cursor

我正在使用SAP时间表数据,因此有数百万行。我正在尝试从SAP表中选择数据并将其插入MS SQL Server上的表中。

所以我想插入原始记录,然后如果发生对原始记录的更新(以带有refcounter的新SAP记录的形式出现),我想在表中查找原始记录并对其进行更新,并保留原始计数器值。

所以我已经成功地使用游标成功完成了此任务(我不知道是最好的),但是由于有数百万条记录,我想知道是否有更快的方法,因为我正在游标的第4天运行。有没有比下面的方法更好的方法了?

BEGIN
    CREATE TABLE CATSDB 
        (
            [COUNTER] nvarchar(12),
            REFCOUNTER nvarchar(12),
            PERNR nvarchar(8),
            WORKDATE nvarchar(8),
            CATSHOURS decimal(7, 3),
            APDAT nvarchar(8),
            LAETM nvarchar(6),
            CATS_STATUS nvarchar(2),
            APPR_STATUS nvarchar(2)
        )   

    INSERT INTO CATSDB
            (
                [COUNTER],REFCOUNTER,PERNR,WORKDATE,CATSHOURS,APDAT,LAETM,CATS_STATUS,APPR_STATUS
            )
        VALUES
            ('000421692670',NULL,'00000071','20190114','6.00','20190204','174541','30','30'),
            ('000421692671',NULL,'00000071','20190114','3.00','20190204','174541','30','30'),
            ('000421692672',NULL,'00000071','20190115','6.00','00000000','000000','60','20'),
            ('000421692673',NULL,'00000071','20190115','3.00','00000000','000000','60','20'),
            ('000421692712','000421692672','00000071','20190115','0.00','20190115','111007','30','30'),
            ('000421692713','000421692673','00000071','20190115','0.00','20190115','111007','30','30'),
            ('000429718015',NULL,'00000072','20190313','7.00','00000000','000000','60','20'),
            ('000429718016',NULL,'00000072','20190313','1.50','20190315','164659','30','30'),
            ('000429718017',NULL,'00000072','20190313','1.00','20190315','164659','30','30'),
            ('000430154143',NULL,'00000072','20190313','2.00','00000000','000000','60','20'),
            ('000430154142','000429718015','00000072','20190313','5.00','00000000','000000','60','20'),
            ('000430154928','000430154142','00000072','20190313','4.50','20190315','164659','30','30'),
            ('000430154929','000430154143','00000072','20190313','2.50','20190315','164659','30','30'),
            ('000429774620',NULL,'00000152','20190314','1.00','00000000','000000','60','20'),
            ('000429774619',NULL,'00000152','20190314','1.00','00000000','000000','60','20'),
            ('000429802106','000429774620','00000152','20190314','2.00','00000000','000000','60','20'),
            ('000429802105','000429774619','00000152','20190314','3.00','00000000','000000','60','20'),
            ('000429840242','000429802106','00000152','20190314','4.00','20190315','143857','30','30'),
            ('000429840241','000429802105','00000152','20190314','5.00','20190315','143857','30','30')

    CREATE TABLE [TBL_COUNTER]
        (
            [COUNTER] [varchar](12) NOT NULL,
            [REFCOUNTER] [varchar](12) NULL
        )   

    CREATE TABLE TEMP
        (
            [COUNTER] [nvarchar](12) NOT NULL,
            [REFCOUNTER] [nvarchar](12) NULL,
            [PERNR] [nvarchar](8) NULL,
            [WORKDATE] [nvarchar](8) NULL,
            [CATSHOURS] [decimal](7, 3) NULL,
            [APDAT] [nvarchar](8) NULL,
            [LAETM] [nvarchar](6) NULL,
            [CATS_STATUS] [nvarchar](2) NULL,
            [APPR_STATUS] [nvarchar](2) NULL
        )       
END

BEGIN
    DECLARE     @COUNTER nvarchar(12),  
                @REFCOUNTER nvarchar(12),   
                @PERNR nvarchar(8), 
                @WORKDATE nvarchar(8),  
                @CATSHOURS decimal(7, 3),
                @APDAT nvarchar(8),
                @LAETM nvarchar(6),
                @CATS_STATUS nvarchar(2),
                @APPR_STATUS nvarchar(2)

    DECLARE @orig_counter nvarchar(12)
END

BEGIN
    DECLARE curs CURSOR FOR
        SELECT 
                [COUNTER],
                REFCOUNTER,
                PERNR,
                WORKDATE,
                CATSHOURS,
                APDAT,
                LAETM,
                CATS_STATUS,
                APPR_STATUS
        FROM 
                CATSDB
END

BEGIN
    OPEN curs
END

BEGIN
    FETCH NEXT FROM curs INTO
        @COUNTER,
        @REFCOUNTER,
        @PERNR,
        @WORKDATE,
        @CATSHOURS,
        @APDAT,
        @LAETM,
        @CATS_STATUS,
        @APPR_STATUS
END

BEGIN
    WHILE @@FETCH_STATUS = 0
        BEGIN
            BEGIN
                IF NOT EXISTS (SELECT * FROM TBL_COUNTER WHERE [COUNTER] = @COUNTER)
                    BEGIN
                        INSERT INTO TBL_COUNTER
                                ([COUNTER]
                                ,REFCOUNTER)
                            VALUES
                                (@COUNTER
                                ,@REFCOUNTER)
                    END
            END
            BEGIN
                IF NOT EXISTS (SELECT * FROM TEMP WHERE [COUNTER] = @COUNTER)
                    BEGIN
                            --If REFCOUNTER is populated, get the original COUNTER value, then update that row with the new values. Otherwise insert new record
                            IF @REFCOUNTER <> '' AND @REFCOUNTER IS NOT NULL
                                BEGIN
                                    BEGIN
                                        WITH n([COUNTER], REFCOUNTER) AS 
                                            (
                                                SELECT 
                                                        cnt.[COUNTER], 
                                                        cnt.REFCOUNTER 
                                                FROM 
                                                        TBL_COUNTER cnt
                                                WHERE 
                                                        cnt.[COUNTER] = @REFCOUNTER
                                            UNION ALL
                                                SELECT 
                                                        nplus1.[COUNTER], 
                                                        nplus1.REFCOUNTER 
                                                FROM 
                                                        TBL_COUNTER as nplus1, 
                                                        n
                                                WHERE 
                                                        n.[COUNTER] = nplus1.REFCOUNTER
                                            )
                                        SELECT @orig_counter = [COUNTER] FROM n WHERE REFCOUNTER = '' OR REFCOUNTER IS NULL
                                    END
                                    BEGIN
                                        UPDATE TEMP
                                           SET 
                                               [REFCOUNTER] = @REFCOUNTER
                                              ,[PERNR] = @PERNR 
                                              ,[WORKDATE] = @WORKDATE                                               
                                              ,[CATSHOURS] = @CATSHOURS                                                                                    
                                              ,[APDAT] = @APDAT                                        
                                              ,[LAETM] = @LAETM
                                              ,[CATS_STATUS] = @CATS_STATUS
                                              ,[APPR_STATUS] = @APPR_STATUS                                        
                                            WHERE [COUNTER] = @orig_counter
                                    END
                                END
                            ELSE
                                BEGIN
                                    INSERT INTO TEMP
                                               ([COUNTER]
                                               ,[REFCOUNTER]                                               
                                               ,[PERNR]                                               
                                               ,[WORKDATE]                                               
                                               ,[CATSHOURS]                                             
                                               ,[APDAT]                                              
                                               ,[LAETM]
                                               ,[CATS_STATUS]                                               
                                               ,[APPR_STATUS])                                              
                                         VALUES
                                               (@COUNTER
                                               ,@REFCOUNTER                                              
                                               ,@PERNR                                               
                                               ,@WORKDATE                                             
                                               ,@CATSHOURS                                               
                                               ,@APDAT                                               
                                               ,@LAETM                                               
                                               ,@CATS_STATUS                                               
                                               ,@APPR_STATUS)                                               
                                END
                    END

            FETCH NEXT FROM curs INTO
                @COUNTER,
                @REFCOUNTER,
                @PERNR,
                @WORKDATE,
                @CATSHOURS,
                @APDAT,
                @LAETM,
                @CATS_STATUS,
                @APPR_STATUS
        END
    END
END

BEGIN
    CLOSE curs
    DEALLOCATE curs
END

我将其缩短并创建了表格,供大家查看正在发生的事情。预期结果是

+--------------+--------------+----------+----------+-----------+----------+--------+-------------+-------------+
|   COUNTER    |  REFCOUNTER  |  PERNR   | WORKDATE | CATSHOURS |  APDAT   | LAETM  | CATS_STATUS | APPR_STATUS |
+--------------+--------------+----------+----------+-----------+----------+--------+-------------+-------------+
| 000421692670 | NULL         | 00000071 | 20190114 |      6.00 | 20190204 | 174541 |          30 |          30 |
| 000421692671 | NULL         | 00000071 | 20190114 |      3.00 | 20190204 | 174541 |          30 |          30 |
| 000421692672 | 000421692672 | 00000071 | 20190115 |      0.00 | 20190115 | 111007 |          30 |          30 |
| 000421692673 | 000421692673 | 00000071 | 20190115 |      0.00 | 20190115 | 111007 |          30 |          30 |
| 000429718015 | 000430154142 | 00000072 | 20190313 |      4.50 | 20190315 | 164659 |          30 |          30 |
| 000429718016 | NULL         | 00000072 | 20190313 |      1.50 | 20190315 | 164659 |          30 |          30 |
| 000429718017 | NULL         | 00000072 | 20190313 |       1.0 | 20190315 | 164659 |          30 |          30 |
| 000430154143 | 000430154143 | 00000072 | 20190313 |      2.50 | 20190315 | 164659 |          30 |          30 |
| 000429774620 | 000429774620 | 00000152 | 20190314 |      2.00 | 00000000 | 000000 |          60 |          20 |
| 000429774619 | 000429802105 | 00000152 | 20190314 |      5.00 | 20190315 | 143857 |          30 |          30 |
+--------------+--------------+----------+----------+-----------+----------+--------+-------------+-------------+

我需要添加到此。因此,有两个阶段。第一阶段是我将拉取2019年以来的所有数据以进行表的初始加载。然后每周一次,我将从原始来源中提取数据以获取新记录,并从上次运行以来更改记录。所以我不会每周都有完整的链条。在没有完整的数据集的情况下,需要一种方法来返回原始计数器值,这就是为什么我拥有计数器表的原因。抱歉,我不清楚。我忙于工作,还没能按照我的计划专注于此。我正在尝试所有这些不同的技术。

6 个答案:

答案 0 :(得分:1)

我相信,以下查询将帮助您入手,这是实现目标的一种非常有效的方法。

创建它是为了在中央位置维护SQL Server的历史信息,并执行以下活动,您必须在相应的脚本块中包含/替换表结构

  1. 创建temp
  2. 使用OPENQUERY(源)使用Lined Servers从多台服务器收集信息,并将其加载到Temp表中。
  3. Temp表上创建索引
  4. 使用3种情况(如脚本中所述)将数据加载到中央表(目标)中

注意:根据您的情况替换了脚本


BEGIN
    Create Table #SrcTemp
                    (   AENAM nvarchar(12),
                        AUTYP nvarchar(2),
                        AWART nvarchar(4),
                        BELNR nvarchar(10),
                        CATSHOURS decimal(7, 3),
                        CATSQUANTITY decimal(18, 3),
                        CHARGE_HOLD nvarchar(24),
                        [COUNTER] nvarchar(12),
                        ERNAM nvarchar(12),
                        ERSDA nvarchar(8),
                        ERSTM nvarchar(6),
                        HRCOSTASG nvarchar(1),
                        LAEDA nvarchar(8),
                        LSTAR nvarchar(6),
                        LTXA1 nvarchar(40),
                        MANDT nvarchar(3),
                        PERNR nvarchar(8),
                        RAPLZL nvarchar(8),
                        RAUFPL nvarchar(10),
                        REFCOUNTER nvarchar(12),
                        RNPLNR nvarchar(12),
                        SKOSTL nvarchar(10),
                        CATS_STATUS nvarchar(2),
                        SUPP3 nvarchar(10),
                        WORKDATE nvarchar(8),
                        ZZOH_ORDER nvarchar(24),
                        APDAT nvarchar(8),
                        APNAM nvarchar(12),
                        LAETM nvarchar(6),
                        APPR_STATUS nvarchar(2)
                    );

--    DECLARE @orig_counter nvarchar(12)
END
    UPDATE #SrcTemp SET REFCOUNTER = '0' WHERE  REFCOUNTER = '' or REFCOUNTER is null;
    CREATE Clustered Index CLU_SrvTemp on #SrcTemp ([COUNTER], REFCOUNTER);

BEGIN

        INSERT INTO #SrcTemp
        SELECT 
                AENAM,AUTYP,AWART,BELNR,CATSHOURS,CATSQUANTITY,CHARGE_HOLD,[COUNTER],ERNAM,ERSDA,ERSTM,HRCOSTASG,LAEDA,LSTAR,LTXA1,MANDT,
                PERNR,RAPLZL,RAUFPL,REFCOUNTER,RNPLNR,SKOSTL,CATS_STATUS,SUPP3,WORKDATE,ZZOH_ORDER,APDAT,APNAM,LAETM,APPR_STATUS
        FROM    
                CATSDB;
END

--BEGIN
--    OPEN curs
--END

-- Scope: UNCHANGED Records ==================================================================================================================================

    IF EXISTS 
        (select *
         from   (
                    SELECT ROW_NUMBER () OVER (PARTITION BY [COUNTER] ORDER BY COUNTER) AS RN
                    FROM    #SrcTemp
                    WHERE REFCOUNTER = '0'
                ) as t where t.RN > 1
         )
        BEGIN
            RAISERROR ('Primary key violation occurred in "UNCHANGED" records processing block', 16, 1) with NOWAIT;
        END
    ELSE 

    BEGIN
    -- When NON-CHANGED Records NOT Existed in SQL table -------------------------------------------
            BEGIN
                INSERT INTO TEMP  ([AENAM],[AUTYP],[AWART],[BELNR],[CATSHOURS],[CATSQUANTITY],[CHARGE_HOLD],[COUNTER],[ERNAM]
                                    ,[ERSDA],[ERSTM],[HRCOSTASG],[LAEDA],[LSTAR],[LTXA1],[MANDT],[PERNR],[RAPLZL],[RAUFPL]
                                    ,[REFCOUNTER],[RNPLNR],[SKOSTL],[CATS_STATUS],[SUPP3],[WORKDATE],[ZZOH_ORDER],[APDAT],[APNAM]
                                    ,[LAETM],[APPR_STATUS]
                                    )
                SELECT    s.[AENAM], s.[AUTYP], s.[AWART], s.[BELNR], s.[CATSHOURS], s.[CATSQUANTITY], s.[CHARGE_HOLD], s.[COUNTER], s.[ERNAM]
                        , s.[ERSDA], s.[ERSTM], s.[HRCOSTASG], s.[LAEDA], s.[LSTAR], s.[LTXA1], s.[MANDT], s.[PERNR], s.[RAPLZL], s.[RAUFPL]
                        , s.[REFCOUNTER], s.[RNPLNR], s.[SKOSTL], s.[CATS_STATUS], s.[SUPP3], s.[WORKDATE], s.[ZZOH_ORDER], s.[APDAT], s.[APNAM]
                        , s.[LAETM], s.[APPR_STATUS]
                FROM    #SrcTemp as S
                        LEFT JOIN
                            TEMP as D on s.COUNTER = d.COUNTER
                WHERE (S.REFCOUNTER = '0') and D.COUNTER is null ;
            END

        -- When NON-CHANGED Records Existed in SQL table -------------------------------------------
            BEGIN
                UPDATE S
                    SET [AENAM] = D.AENAM
                        ,[AUTYP] = D.AUTYP
                        ,[AWART] = D.AWART
                        ,[BELNR] = D.BELNR
                        ,[CATSHOURS] = D.CATSHOURS
                        ,[CATSQUANTITY] = D.CATSQUANTITY
                        ,[CHARGE_HOLD] = D.CHARGE_HOLD
                        ,[ERNAM] = D.ERNAM
                        ,[ERSDA] = D.ERSDA
                        ,[ERSTM] = D.ERSTM
                        ,[HRCOSTASG] = D.HRCOSTASG
                        ,[LAEDA] = D.LAEDA
                        ,[LSTAR] = D.LSTAR
                        ,[LTXA1] = D.LTXA1
                        ,[MANDT] = D.MANDT
                        ,[PERNR] = D.PERNR
                        ,[RAPLZL] = D.RAPLZL
                        ,[RAUFPL] = D.RAUFPL
                        ,[REFCOUNTER] = D.REFCOUNTER
                        ,[RNPLNR] = D.RNPLNR
                        ,[SKOSTL] = D.SKOSTL
                        ,[CATS_STATUS] = D.CATS_STATUS
                        ,[SUPP3] = D.SUPP3
                        ,[WORKDATE] = D.WORKDATE
                        ,[ZZOH_ORDER] = D.ZZOH_ORDER
                        ,[APDAT] = D.APDAT
                        ,[APNAM] = D.APNAM
                        ,[LAETM] = D.LAETM
                        ,[APPR_STATUS] = D.APPR_STATUS
                FROM    #SrcTemp as S
                        LEFT JOIN
                            TEMP as D on    (s.COUNTER = d.COUNTER and S.REFCOUNTER = D.REFCOUNTER)
                WHERE (S.REFCOUNTER = '0') and D.COUNTER is NOT null 
            END
    END

-- Scope: CHANGED Records ==================================================================================================================================

    IF EXISTS 
        (select *
         from   (
                    SELECT ROW_NUMBER () OVER (PARTITION BY [COUNTER], REFCOUNTER ORDER BY [COUNTER]) AS RN
                    FROM    #SrcTemp
                    WHERE not REFCOUNTER = '0' 
                ) as t where t.RN > 1
         )
        BEGIN
            RAISERROR ('Primary key violation occurred in "CHANGED" records processing block', 10, 1) with NOWAIT;
        END
    ELSE

    BEGIN
        -- When CHANGED Records NOT Existed in SQL table -------------------------------------------
            BEGIN
                INSERT INTO TEMP  ([AENAM],[AUTYP],[AWART],[BELNR],[CATSHOURS],[CATSQUANTITY],[CHARGE_HOLD],[COUNTER],[ERNAM]
                                    ,[ERSDA],[ERSTM],[HRCOSTASG],[LAEDA],[LSTAR],[LTXA1],[MANDT],[PERNR],[RAPLZL],[RAUFPL]
                                    ,[REFCOUNTER],[RNPLNR],[SKOSTL],[CATS_STATUS],[SUPP3],[WORKDATE],[ZZOH_ORDER],[APDAT],[APNAM]
                                    ,[LAETM],[APPR_STATUS]
                                    )
                SELECT    s.[AENAM], s.[AUTYP], s.[AWART], s.[BELNR], s.[CATSHOURS], s.[CATSQUANTITY], s.[CHARGE_HOLD], s.[COUNTER], s.[ERNAM]
                        , s.[ERSDA], s.[ERSTM], s.[HRCOSTASG], s.[LAEDA], s.[LSTAR], s.[LTXA1], s.[MANDT], s.[PERNR], s.[RAPLZL], s.[RAUFPL]
                        , s.[REFCOUNTER], s.[RNPLNR], s.[SKOSTL], s.[CATS_STATUS], s.[SUPP3], s.[WORKDATE], s.[ZZOH_ORDER], s.[APDAT], s.[APNAM]
                        , s.[LAETM], s.[APPR_STATUS]
                FROM    #SrcTemp as S
                        LEFT JOIN
                            TEMP as D on s.COUNTER = d.COUNTER and S.REFCOUNTER = D.REFCOUNTER
                WHERE (not S.REFCOUNTER = '0') and D.COUNTER is null 
            END

        -- When NON-CHANGED Records Existed in SQL table -------------------------------------------
            BEGIN
                UPDATE S
                    SET [AENAM] = D.AENAM
                        ,[AUTYP] = D.AUTYP
                        ,[AWART] = D.AWART
                        ,[BELNR] = D.BELNR
                        ,[CATSHOURS] = D.CATSHOURS
                        ,[CATSQUANTITY] = D.CATSQUANTITY
                        ,[CHARGE_HOLD] = D.CHARGE_HOLD
                        ,[ERNAM] = D.ERNAM
                        ,[ERSDA] = D.ERSDA
                        ,[ERSTM] = D.ERSTM
                        ,[HRCOSTASG] = D.HRCOSTASG
                        ,[LAEDA] = D.LAEDA
                        ,[LSTAR] = D.LSTAR
                        ,[LTXA1] = D.LTXA1
                        ,[MANDT] = D.MANDT
                        ,[PERNR] = D.PERNR
                        ,[RAPLZL] = D.RAPLZL
                        ,[RAUFPL] = D.RAUFPL
                        ,[REFCOUNTER] = D.REFCOUNTER
                        ,[RNPLNR] = D.RNPLNR
                        ,[SKOSTL] = D.SKOSTL
                        ,[CATS_STATUS] = D.CATS_STATUS
                        ,[SUPP3] = D.SUPP3
                        ,[WORKDATE] = D.WORKDATE
                        ,[ZZOH_ORDER] = D.ZZOH_ORDER
                        ,[APDAT] = D.APDAT
                        ,[APNAM] = D.APNAM
                        ,[LAETM] = D.LAETM
                        ,[APPR_STATUS] = D.APPR_STATUS
                FROM    #SrcTemp as S
                        LEFT JOIN
                        TEMP as D on s.COUNTER = d.COUNTER and S.REFCOUNTER = D.REFCOUNTER
                WHERE (not S.REFCOUNTER = '0' ) and D.COUNTER is NOT null 
            END
    END

Drop table #SrcTemp;

答案 1 :(得分:0)

看起来可以通过简单的递归查询来完成。拥有合适的索引也很重要。

样本数据

这是示例数据在问题中的外观。只有很少的相关列。 最好包括几组变更/链,而不仅仅是一组。仅具有此样本数据将使您更难验证所提供的解决方案是否正确。

+-----------+---------------------+-----------+------------+
|   BELNR   |     CHARGE_HOLD     |  COUNTER  | REFCOUNTER |
+-----------+---------------------+-----------+------------+
| 417548605 | T4-GS023ABC2 0150#* | 420202428 | NULL       |
| 417549506 | T4-GS023-ABC2       | 420203329 | 420202428  |
| 417553156 | JGS023001    0010#* | 420206979 | 420203329  |
| 417557221 | T4-GS023-ABC2       | 420211044 | 420206979  |
| 417581675 | JGS023001    0010#* | 420235498 | 420211044  |
| 417677969 | JGS023001    0010#* | 420331792 | 420235498  |
+-----------+---------------------+-----------+------------+

查询的主要递归部分

WITH
CTE
AS
(
    SELECT
        1 AS Lvl,
        CATSDB.BELNR AS OriginalBELNR,
        CATSDB.CHARGE_HOLD AS OriginalCHARGE_HOLD,
        CATSDB.[COUNTER] AS OriginalCOUNTER,
        CATSDB.REFCOUNTER AS OrginalREFCOUNTER,
        CATSDB.BELNR AS NewBELNR,
        CATSDB.CHARGE_HOLD AS NewCHARGE_HOLD,
        CATSDB.[COUNTER] AS NewCOUNTER,
        CATSDB.REFCOUNTER AS NewREFCOUNTER
    FROM
        CATSDB
    WHERE
        REFCOUNTER IS NULL

    UNION ALL

    SELECT
        CTE.Lvl + 1 AS Lvl,
        CTE.OriginalBELNR,
        CTE.OriginalCHARGE_HOLD,
        CTE.OriginalCOUNTER,
        CTE.OrginalREFCOUNTER,
        CATSDB.BELNR AS NewBELNR,
        CATSDB.CHARGE_HOLD AS NewCHARGE_HOLD,
        CATSDB.[COUNTER] AS NewCOUNTER,
        CATSDB.REFCOUNTER AS NewREFCOUNTER
    FROM
        CATSDB
        INNER JOIN CTE ON CATSDB.REFCOUNTER = CTE.NewCOUNTER
)
SELECT * FROM CTE;

中间结果

+-----+---------------+---------------------+-----------------+-------------------+-----------+---------------------+------------+---------------+
| Lvl | OriginalBELNR | OriginalCHARGE_HOLD | OriginalCOUNTER | OrginalREFCOUNTER | NewBELNR  |   NewCHARGE_HOLD    | NewCOUNTER | NewREFCOUNTER |
+-----+---------------+---------------------+-----------------+-------------------+-----------+---------------------+------------+---------------+
|   1 |     417548605 | T4-GS023ABC2 0150#* |       420202428 | NULL              | 417548605 | T4-GS023ABC2 0150#* |  420202428 | NULL          |
|   2 |     417548605 | T4-GS023ABC2 0150#* |       420202428 | NULL              | 417549506 | T4-GS023-ABC2       |  420203329 | 420202428     |
|   3 |     417548605 | T4-GS023ABC2 0150#* |       420202428 | NULL              | 417553156 | JGS023001    0010#* |  420206979 | 420203329     |
|   4 |     417548605 | T4-GS023ABC2 0150#* |       420202428 | NULL              | 417557221 | T4-GS023-ABC2       |  420211044 | 420206979     |
|   5 |     417548605 | T4-GS023ABC2 0150#* |       420202428 | NULL              | 417581675 | JGS023001    0010#* |  420235498 | 420211044     |
|   6 |     417548605 | T4-GS023ABC2 0150#* |       420202428 | NULL              | 417677969 | JGS023001    0010#* |  420331792 | 420235498     |
+-----+---------------+---------------------+-----------------+-------------------+-----------+---------------------+------------+---------------+

您可以看到我们已经走到了链的开始行(RefCounter is NULL),并将其带到了整个变更链中。

现在,我们只需要选择最后一次更改的行,即每个开始行的Lvl最大即可。一种实现方法是在适当的分区上使用ROW_NUMBER函数。

最终查询

WITH
CTE
AS
(
    SELECT
        1 AS Lvl,
        CATSDB.BELNR AS OriginalBELNR,
        CATSDB.CHARGE_HOLD AS OriginalCHARGE_HOLD,
        CATSDB.[COUNTER] AS OriginalCOUNTER,
        CATSDB.REFCOUNTER AS OrginalREFCOUNTER,
        CATSDB.BELNR AS NewBELNR,
        CATSDB.CHARGE_HOLD AS NewCHARGE_HOLD,
        CATSDB.[COUNTER] AS NewCOUNTER,
        CATSDB.REFCOUNTER AS NewREFCOUNTER
    FROM
        CATSDB
    WHERE
        REFCOUNTER IS NULL

    UNION ALL

    SELECT
        CTE.Lvl + 1 AS Lvl,
        CTE.OriginalBELNR,
        CTE.OriginalCHARGE_HOLD,
        CTE.OriginalCOUNTER,
        CTE.OrginalREFCOUNTER,
        CATSDB.BELNR AS NewBELNR,
        CATSDB.CHARGE_HOLD AS NewCHARGE_HOLD,
        CATSDB.[COUNTER] AS NewCOUNTER,
        CATSDB.REFCOUNTER AS NewREFCOUNTER
    FROM
        CATSDB
        INNER JOIN CTE ON CATSDB.REFCOUNTER = CTE.NewCOUNTER
)
,CTE_rn
AS
(
    SELECT
        *
        ,ROW_NUMBER() OVER (PARTITION BY OriginalCOUNTER ORDER BY Lvl DESC) AS rn
    FROM CTE
)
SELECT *
FROM CTE_rn
WHERE rn = 1
--OPTION (MAXRECURSION 0)
;

如果链长于100,则应在查询中添加OPTION (MAXRECURSION 0),因为默认情况下,SQL Server将递归深度限制为100。

结果

+-----+---------------+---------------------+-----------------+-------------------+-----------+---------------------+------------+---------------+----+
| Lvl | OriginalBELNR | OriginalCHARGE_HOLD | OriginalCOUNTER | OrginalREFCOUNTER | NewBELNR  |   NewCHARGE_HOLD    | NewCOUNTER | NewREFCOUNTER | rn |
+-----+---------------+---------------------+-----------------+-------------------+-----------+---------------------+------------+---------------+----+
|   6 |     417548605 | T4-GS023ABC2 0150#* |       420202428 | NULL              | 417677969 | JGS023001    0010#* |  420331792 |     420235498 |  1 |
+-----+---------------+---------------------+-----------------+-------------------+-----------+---------------------+------------+---------------+----+

效率

要使其高效运行,我们需要在REFCOUNTER列上有一个索引。此外,查询假定REFCOUNTER为NULL,而不是''。如果混合使用NULL和空字符串,请统一数据,否则索引将无用。该索引是您所需要的最小值。

理想情况下,REFCOUNTER列上应该有一个CLUSTERED索引,因为查询总是从表中选择所有列。

CREATE CLUSTERED INDEX [IX_RefCounter] ON [dbo].[CATSDB]
(
    [REFCOUNTER] ASC
)

如果您无法更改原始表的索引,我建议将所有数百万行复制到临时表中,并为该临时表创建此聚集索引。

我有了这个聚集索引的不错的计划。

plan

答案 2 :(得分:0)

您可以采取一些措施来提高性能:

从nvarchar将COUNTER和REFCOUNTER转换为数据类型int,对int的操作比字符快得多。 不要使用游标,您仍然可以使用while循环一次处理一条记录。

DECLARE @CCOUNTER int = 0
WHILE (1 = 1)
BEGIN
    /* SELECT @COUNTER = MIN(COUNTER) > @COUNTER FROM CATSDB */
    /* IF @@ROWCOUNT != 1 THEN BREAK OUT OF THE WHILE LOOP, WE ARE DONE */
    /* SELECT RECORD FOR THIS @COUNTER FROM CATSDB */
    /* DO THE PROCESSING FOR THIS RECORD */
END

答案 3 :(得分:0)

有一种叫做sql批量复制的方法,我没有,但是可以尝试一下。

答案 4 :(得分:0)

最有效的方法是通过BCP。 https://docs.microsoft.com/en-us/sql/tools/bcp-utility?view=sql-server-2017

您可以将所有数据BCP放入SQL Server的登台表中,然后运行插入和更新。同样,在检查记录是否不存在以确定它是插入还是更新“如果不存在(SELECT * FROM TEMP WHERE [COUNTER] = @COUNTER)”时,这非常昂贵。

更高效的方法示例: (表名称 TBL_SOURCE TBL_DESTINATION #TBL_UPDATES #TBL_INSERTS

SELECT * into #TBL_INSERTS
FROM TBL_SOURCE S
    left outer join TBL_DESTINATION D on S.COUNTER=D.COUNTER
WHERE D.Counter is null

SELECT * into #TBL_UPDATES
FROM TBL_SOURCE S
    left outer join TBL_DESTINATION D on S.COUNTER=D.COUNTER
WHERE D.Counter is not null

更新将在#tbl_updates中捕获,并在#tbl_inserts中插入

答案 5 :(得分:0)

请参阅基于少量样本数据和给定输出的脚本,我们的脚本无法100%确定并优化,其中要更新数百万个数据。

我对自己的脚本充满信心,在完全理解需求之后可以朝着这个方向改进。

首先,我想知道为什么数据类型为nvarchar,如果可能的话请设为varchar,int,datetime

如果您可以更改数据类型,那么它将对性能产生疑问。

也没有标识列应为Clustered Index

从性能的角度来看,这两点很重要。

在我的示例中,

CREATE TABLE CATSDB 
        (
            id int identity ,
            [COUNTER] nvarchar(12),
            REFCOUNTER nvarchar(12),
            PERNR nvarchar(8),
            WORKDATE nvarchar(8),
            CATSHOURS decimal(7, 3),
            APDAT nvarchar(8),
            LAETM nvarchar(6),
            CATS_STATUS nvarchar(2),
            APPR_STATUS nvarchar(2)
        )   

ALTER TABLE CATSDB
ADD CONSTRAINT PK_CATSDB_ID PRIMARY KEY CLUSTERED(ID)

CREATE NONCLUSTERED INDEX FICATSDB_REFCOUNTER ON CATSDB(REFCOUNTER,[COUNTER]);




IF OBJECT_ID('tempdb..#TEMP', 'U') IS NOT NULL
    DROP TABLE #TEMP;

CREATE TABLE #TEMP
(UpdateID      INT,
 FINDID        INT
 PRIMARY KEY,
 [COUNTER]     [NVARCHAR](12) NOT NULL,
 [REFCOUNTER]  [NVARCHAR](12) NULL,
 [PERNR]       [NVARCHAR](8) NULL,
 [WORKDATE]    [NVARCHAR](8) NULL,
 [CATSHOURS]   [DECIMAL](7, 3) NULL,
 [APDAT]       [NVARCHAR](8) NULL,
 [LAETM]       [NVARCHAR](6) NULL,
 [CATS_STATUS] [NVARCHAR](2) NULL,
 [APPR_STATUS] [NVARCHAR](2) NULL
);

WITH CTE
     AS (SELECT a.id,
                a.[COUNTER],
                a.REFCOUNTER,
                a.id AS Findid
         FROM dbo.CATSDB A

         UNION ALL
         SELECT b.id,
                a.[COUNTER],
                a.[refCOUNTER],
                a.id
         FROM dbo.CATSDB A
              INNER JOIN CTE b ON(a.REFCOUNTER = b.[COUNTER])
         WHERE a.id >= b.Findid),
     CTE1
     AS (SELECT id,
                MAX(Findid) Findid
         FROM CTE
         GROUP BY id)

     INSERT INTO #TEMP
     (UpdateID,
      FINDID,
      [COUNTER],
      [REFCOUNTER],
      [PERNR],
      [WORKDATE],
      [CATSHOURS],
      [APDAT],
      [LAETM],
      [CATS_STATUS],
      [APPR_STATUS]
     )
            SELECT c1.ID,
                   c1.FINDID,
                   a.COUNTER,
                   a.REFCOUNTER,
                   a.PERNR,
                   a.WORKDATE,
                   a.CATSHOURS,
                   a.APDAT,
                   a.LAETM,
                   a.CATS_STATUS,
                   a.APPR_STATUS
            FROM dbo.CATSDB A
                 INNER JOIN CTE1 c1 ON a.id = c1.Findid;

BEGIN TRY
    BEGIN TRAN;

    UPDATE A
      SET
          [REFCOUNTER] = b.REFCOUNTER,
          [PERNR] = b.PERNR,
          [WORKDATE] = b.WORKDATE,
          [CATSHOURS] = b.CATSHOURS,
          [APDAT] = b.APDAT,
          [LAETM] = b.LAETM,
          [CATS_STATUS] = b.CATS_STATUS,
          [APPR_STATUS] = b.APPR_STATUS
    FROM CATSDB A
         INNER JOIN #TEMP B ON a.id = b.UpdateID;

    -- this is only test query
    SELECT c1.UpdateID AS UpdateID,
           a.*
    FROM dbo.CATSDB A
         INNER JOIN #TEMP c1 ON a.id = c1.Findid;

    IF(@@trancount > 0)
        ROLLBACK; -- commit
END TRY
BEGIN CATCH
    IF(@@trancount > 0)
        ROLLBACK;
END CATCH;

#Temp should be permanent table.

IMO,您的表急需使用Identity列,该列应为Identity和Clustered Index。

您可以尝试,也可以对其进行更改。

REFCOUNTER,COUNTER应该是非聚簇索引。

只有在优化查询之后并且在索引上方具有正确的PLAN后,才能提高性能。

正确的计划:您应该一次使用Recursive or RBAR并更新数百万条记录还是我应该Batch update

您可以首先使用Rollback测试具有数百万行的脚本。