删除匹配集

时间:2018-07-31 23:43:17

标签: sql sql-server tsql sql-server-2014

这是我的架构和数据的示例:

declare @temp table (rowid int identity(1,1), groupNumber int, typeName varchar(10), valueA int, valueB int, effectiveDate datetime, expiredDate datetime)

insert @temp values (234545, 'Upper', 1, 1000, '1/1/18 11:31:00', '2/1/18 22:01:00')
insert @temp values (234545, 'Lower', 2, 0, '1/1/18 11:31:00', '2/1/18 22:01:00')
insert @temp values (234545, 'Upper', 1, 1000, '2/1/18 22:01:00', '4/15/18 05:39:00')
insert @temp values (234545, 'Lower', 2, 0, '2/1/18 22:01:00', '4/15/18 05:39:00')
insert @temp values (234545, 'Upper', 1, 900, '4/15/18 05:39:00', '6/1/18 10:32:00')
insert @temp values (234545, 'Lower', 2, 0, '4/15/18 05:39:00', '6/1/18 10:32:00')
insert @temp values (234545, 'Upper', 1, 900, '4/15/18 06:39:00', '6/1/18 10:32:00')
insert @temp values (234545, 'Lower', 2, 0, '4/15/18 06:39:00', '6/1/18 10:32:00')
insert @temp values (234545, 'Upper', 1, 800, '6/1/18 10:32:00', null)
insert @temp values (234545, 'Lower', 2, 0, '6/1/18 10:32:00', null)

select *
from @temp

对于每个组号,有多个集合,一个集合由生效日期定义,并且生效日期始终等于上一个集合的到期日期。因此,在示例中有5套,我想做的是删除多余的套,它们将是3/4和5/6行。我只关心valueA或valueB更改时的新集合。最终,我的目标是通过一次循环遍历各组来清理生产数据库中的数据,因为大约60%的行在任何重要方面(例如,typeName,valueA)都完全没有变化。 ,valueB)。

要注意的是,当我删除这4行时,我还需要将前两行的expiredDate设置为等于第7和8行的有效日期,因为这些行总是需要排队。

另一个问题是,我想运行一个脚本(可能通过SQL Agent作业),该脚本遍历表(在生产环境中不是表变量),并删除行并使用新的事务来更新expiredDates每个组号。如果我在完成工作之前停止了工作,并且该事务处于事务处理的中间(很可能),是否可以自动回退该工作?

3 个答案:

答案 0 :(得分:1)

您可以使用Windows函数生成两个等级(一个升序,另一个降序)来标识具有较高和较低值的行。

然后,您只能过滤排名第一的值:

;WITH summary AS (
    SELECT  p.rowid 
           ,p.groupNumber 
           ,p.typeName 
           ,p.valueA 
           ,p.valueB 
           ,p.effectiveDate 
           ,p.expiredDate 
           ,rank() OVER(PARTITION BY p.groupNumber ORDER BY p.effectiveDate ) AS rk_min
           ,rank() OVER(PARTITION BY p.groupNumber ORDER BY p.effectiveDate desc) AS rk_max
      FROM @temp p)
SELECT s.rowid, s. groupNumber, s.typeName, s.valueA, s.valueB,
       s.effectiveDate, s.expiredDate 
FROM summary s
WHERE s.rk_min = 1 or s.rk_max=1
ORDER BY s.rowid

结果:

enter image description here

如果要确定极端之间的内部值,只需将where条件更改为WHERE s.rk_min > 1 and s.rk_max > 1

;WITH summary AS (
    SELECT  p.rowid 
           ,p.groupNumber 
           ,p.typeName 
           ,p.valueA 
           ,p.valueB 
           ,p.effectiveDate 
           ,p.expiredDate 
           ,rank() OVER(PARTITION BY p.groupNumber ORDER BY p.effectiveDate ) AS rk_min
           ,rank() OVER(PARTITION BY p.groupNumber ORDER BY p.effectiveDate desc) AS rk_max
      FROM @temp p)
SELECT s.rowid, s. groupNumber, s.typeName, s.valueA, s.valueB, 
       s.effectiveDate, s.expiredDate 
FROM summary s
WHERE s.rk_min > 1 and s.rk_max > 1
ORDER BY s.rowid

结果:

enter image description here

答案 1 :(得分:1)

这不是最终脚本,因为存在一些疑问。

@问题1:什么是多余的行/集?为什么3/4和5/6行是多余的?答案应涵盖所有可能的情况。

@Doubt 2:前2行expiredDate将更新为最后2行中的哪一行的expiredDate。更新时前2行和后2行之间是什么关系?

CREATE table #temp (rowid int identity(1,1), groupNumber int, typeName varchar(10), valueA int, valueB int
, effectiveDate datetime, expiredDate datetime,isLineup int default(0))

insert #temp values (234545, 'Upper', 1, 1000, '1/1/18 11:31:00', '2/1/18 22:01:00',0)
insert #temp values (234545, 'Lower', 2, 0, '1/1/18 11:31:00', '2/1/18 22:01:00',0)
insert #temp values (234545, 'Upper', 1, 1000, '2/1/18 22:01:00', '4/15/18 05:39:00',0)
insert #temp values (234545, 'Lower', 2, 0, '2/1/18 22:01:00', '4/15/18 05:39:00',0)
insert #temp values (234545, 'Upper', 1, 900, '4/15/18 05:39:00', '6/1/18 10:32:00',0)
insert #temp values (234545, 'Lower', 2, 0, '4/15/18 05:39:00', '6/1/18 10:32:00',0)
insert #temp values (234545, 'Upper', 1, 900, '6/1/18 10:32:00', null,0)
insert #temp values (234545, 'Lower', 2, 0, '6/1/18 10:32:00', null,0)



CREATE table #temp1 (rowid int,effectiveDate datetime,Flag int )
--select * from #temp

-- Main Script

Begin Try
BEGIN TRANSACTION

-- Criteria to decide superflous rows
insert into #temp1 (rowid ,effectiveDate ,Flag  )
select top 2 rowid,effectiveDate,0 Flag from #temp where isLineup=0 ORDER by rowid
insert into #temp1 (rowid ,effectiveDate ,Flag  )
select top 2 rowid,effectiveDate,1 Flag from #temp where isLineup=0 ORDER by rowid desc
--- End

delete FROM #temp 
where not EXISTS(select 1 from #temp1 c where c.rowid=#temp.rowid )

update C 
set expiredDate=ca.effectiveDate
,isLineup=1
from #temp c
cross apply(select top 1 effectiveDate from #temp1 c1 where c1.Flag=1 )ca
where c.isLineup=0

COMMIT

End Try
begin Catch

if (@@trancount>0)
ROLLBACK TRAN

-- log error

end Catch

-- End Main

select * from #temp
select * from #temp1

drop TABLE #temp
drop table #temp1

答案 2 :(得分:1)

我想出了答案:

declare @temp table (rowid int identity(1,1), groupNumber int, typeName varchar(10), valueA int, valueB int, effectiveDate datetime, expiredDate datetime)

insert @temp values (234545, 'Upper', 1, 1000, '1/1/18 11:31:00', '2/1/18 22:01:00')
insert @temp values (234545, 'Lower', 2, 0, '1/1/18 11:31:00', '2/1/18 22:01:00')
insert @temp values (234545, 'Upper', 1, 1000, '2/1/18 22:01:00', '4/15/18 05:39:00')
insert @temp values (234545, 'Lower', 2, 0, '2/1/18 22:01:00', '4/15/18 05:39:00')
insert @temp values (234545, 'Upper', 1, 900, '4/15/18 05:39:00', '6/1/18 10:32:00')
insert @temp values (234545, 'Lower', 2, 0, '4/15/18 05:39:00', '6/1/18 10:32:00')
insert @temp values (234545, 'Upper', 1, 900, '4/15/18 06:39:00', '6/1/18 10:32:00')
insert @temp values (234545, 'Lower', 2, 0, '4/15/18 06:39:00', '6/1/18 10:32:00')
insert @temp values (234545, 'Upper', 1, 800, '6/1/18 10:32:00', null)
insert @temp values (234545, 'Lower', 2, 0, '6/1/18 10:32:00', null)

select * from @temp

DECLARE MY_CURSOR Cursor STATIC 
FOR SELECT DISTINCT groupNumber FROM @temp 

Open My_Cursor 
DECLARE @groupNumber int
Fetch NEXT FROM MY_Cursor INTO @groupNumber
While (@@FETCH_STATUS <> -1)
BEGIN
            IF OBJECT_ID('tempdb..#temp') IS NOT NULL DROP TABLE #temp  

            SELECT RANK() OVER (PARTITION BY rp2.groupNumber ORDER BY rp2.EffectiveDate) AS TheRank, rp2.groupNumber, rp2.EffectiveDate,
                    TotalvalueA = SUM(rp2.valueA), ChecksumTotal = SUM(ISNULL(rp2.valueA,0) + ISNULL(rp2.valueB,0)), --assumes valueA and valueB can never be reversed
                                    (
                                        Select CAST(rp.typeName as varchar(2)) + ',' AS [text()]
                                        From @temp rp
                                        Where rp.groupNumber = rp2.groupNumber AND rp.groupNumber = @groupNumber
                                            and rp.EffectiveDate = rp2.EffectiveDate
                                        GROUP BY rp.typeName
                                        ORDER BY MIN(rp.typeName)                    
                                        For XML PATH ('')
                                    ) typesXML,
                    DeleteSet = 0
            INTO #temp
            FROM @temp rp2
            WHERE rp2.groupNumber = @groupNumber
            GROUP BY rp2.groupNumber, rp2.EffectiveDate

            UPDATE t2
            SET DeleteSet = 1
            From #temp t1
            LEFT JOIN #temp t2 ON t1.TheRank = t2.TheRank - 1
            WHERE t1.TotalvalueA = t2.TotalvalueA AND t1.ChecksumTotal = t2.ChecksumTotal AND t1.typesXML = t2.typesXML
            AND t2.TheRank <> (SELECT MAX(TheRank) FROM #temp)

            BEGIN TRAN

            DELETE rp
            FROM @temp rp 
            JOIN #temp t ON t.groupNumber = rp.groupNumber AND rp.EffectiveDate = t.EffectiveDate AND t.DeleteSet = 1

            if @@error != 0 raiserror('Script failed', 20, -1) with log

            UPDATE rp
            SET ExpiredDate = t2.NewExpiredDate
            FROM @temp rp
            JOIN (SELECT * , NewExpiredDate = LEAD(EffectiveDate) OVER (ORDER BY TheRank) FROM #temp WHERE DeleteSet = 0) t2 ON t2.groupNumber = rp.groupNumber AND rp.EffectiveDate = t2.EffectiveDate
            JOIN #temp t ON t.TheRank = t2.TheRank + 1
            WHERE rp.groupNumber = @groupNumber AND t2.NewExpiredDate IS NOT NULL AND rp.ExpiredDate <> t2.NewExpiredDate
            AND t.DeleteSet = 1

            if @@error != 0 raiserror('Script failed', 20, -1) with log

            PRINT 'No Errors ... Committing changes for ' + CAST(@groupNumber as varchar(15))
            COMMIT
            --select * from @temp
            --ROLLBACK
            --dbcc opentran
    WAITFOR DELAY '00:00:00:005';
    FETCH NEXT FROM MY_CURSOR INTO @groupNumber
END
CLOSE MY_CURSOR
DEALLOCATE MY_CURSOR
GO