这是我的架构和数据的示例:
declare @temp table (rowid int identity(1,1), groupNumber int, typeName varchar(10), valueA int, valueB int, effectiveDate datetime, expiredDate datetime)
insert @temp values (234545, 'Upper', 1, 1000, '1/1/18 11:31:00', '2/1/18 22:01:00')
insert @temp values (234545, 'Lower', 2, 0, '1/1/18 11:31:00', '2/1/18 22:01:00')
insert @temp values (234545, 'Upper', 1, 1000, '2/1/18 22:01:00', '4/15/18 05:39:00')
insert @temp values (234545, 'Lower', 2, 0, '2/1/18 22:01:00', '4/15/18 05:39:00')
insert @temp values (234545, 'Upper', 1, 900, '4/15/18 05:39:00', '6/1/18 10:32:00')
insert @temp values (234545, 'Lower', 2, 0, '4/15/18 05:39:00', '6/1/18 10:32:00')
insert @temp values (234545, 'Upper', 1, 900, '4/15/18 06:39:00', '6/1/18 10:32:00')
insert @temp values (234545, 'Lower', 2, 0, '4/15/18 06:39:00', '6/1/18 10:32:00')
insert @temp values (234545, 'Upper', 1, 800, '6/1/18 10:32:00', null)
insert @temp values (234545, 'Lower', 2, 0, '6/1/18 10:32:00', null)
select *
from @temp
对于每个组号,有多个集合,一个集合由生效日期定义,并且生效日期始终等于上一个集合的到期日期。因此,在示例中有5套,我想做的是删除多余的套,它们将是3/4和5/6行。我只关心valueA或valueB更改时的新集合。最终,我的目标是通过一次循环遍历各组来清理生产数据库中的数据,因为大约60%的行在任何重要方面(例如,typeName,valueA)都完全没有变化。 ,valueB)。
要注意的是,当我删除这4行时,我还需要将前两行的expiredDate设置为等于第7和8行的有效日期,因为这些行总是需要排队。
另一个问题是,我想运行一个脚本(可能通过SQL Agent作业),该脚本遍历表(在生产环境中不是表变量),并删除行并使用新的事务来更新expiredDates每个组号。如果我在完成工作之前停止了工作,并且该事务处于事务处理的中间(很可能),是否可以自动回退该工作?
答案 0 :(得分:1)
您可以使用Windows函数生成两个等级(一个升序,另一个降序)来标识具有较高和较低值的行。
然后,您只能过滤排名第一的值:
;WITH summary AS (
SELECT p.rowid
,p.groupNumber
,p.typeName
,p.valueA
,p.valueB
,p.effectiveDate
,p.expiredDate
,rank() OVER(PARTITION BY p.groupNumber ORDER BY p.effectiveDate ) AS rk_min
,rank() OVER(PARTITION BY p.groupNumber ORDER BY p.effectiveDate desc) AS rk_max
FROM @temp p)
SELECT s.rowid, s. groupNumber, s.typeName, s.valueA, s.valueB,
s.effectiveDate, s.expiredDate
FROM summary s
WHERE s.rk_min = 1 or s.rk_max=1
ORDER BY s.rowid
结果:
如果要确定极端之间的内部值,只需将where条件更改为WHERE s.rk_min > 1 and s.rk_max > 1
:
;WITH summary AS (
SELECT p.rowid
,p.groupNumber
,p.typeName
,p.valueA
,p.valueB
,p.effectiveDate
,p.expiredDate
,rank() OVER(PARTITION BY p.groupNumber ORDER BY p.effectiveDate ) AS rk_min
,rank() OVER(PARTITION BY p.groupNumber ORDER BY p.effectiveDate desc) AS rk_max
FROM @temp p)
SELECT s.rowid, s. groupNumber, s.typeName, s.valueA, s.valueB,
s.effectiveDate, s.expiredDate
FROM summary s
WHERE s.rk_min > 1 and s.rk_max > 1
ORDER BY s.rowid
结果:
答案 1 :(得分:1)
这不是最终脚本,因为存在一些疑问。
@问题1:什么是多余的行/集?为什么3/4和5/6行是多余的?答案应涵盖所有可能的情况。
@Doubt 2:前2行expiredDate将更新为最后2行中的哪一行的expiredDate。更新时前2行和后2行之间是什么关系?
CREATE table #temp (rowid int identity(1,1), groupNumber int, typeName varchar(10), valueA int, valueB int
, effectiveDate datetime, expiredDate datetime,isLineup int default(0))
insert #temp values (234545, 'Upper', 1, 1000, '1/1/18 11:31:00', '2/1/18 22:01:00',0)
insert #temp values (234545, 'Lower', 2, 0, '1/1/18 11:31:00', '2/1/18 22:01:00',0)
insert #temp values (234545, 'Upper', 1, 1000, '2/1/18 22:01:00', '4/15/18 05:39:00',0)
insert #temp values (234545, 'Lower', 2, 0, '2/1/18 22:01:00', '4/15/18 05:39:00',0)
insert #temp values (234545, 'Upper', 1, 900, '4/15/18 05:39:00', '6/1/18 10:32:00',0)
insert #temp values (234545, 'Lower', 2, 0, '4/15/18 05:39:00', '6/1/18 10:32:00',0)
insert #temp values (234545, 'Upper', 1, 900, '6/1/18 10:32:00', null,0)
insert #temp values (234545, 'Lower', 2, 0, '6/1/18 10:32:00', null,0)
CREATE table #temp1 (rowid int,effectiveDate datetime,Flag int )
--select * from #temp
-- Main Script
Begin Try
BEGIN TRANSACTION
-- Criteria to decide superflous rows
insert into #temp1 (rowid ,effectiveDate ,Flag )
select top 2 rowid,effectiveDate,0 Flag from #temp where isLineup=0 ORDER by rowid
insert into #temp1 (rowid ,effectiveDate ,Flag )
select top 2 rowid,effectiveDate,1 Flag from #temp where isLineup=0 ORDER by rowid desc
--- End
delete FROM #temp
where not EXISTS(select 1 from #temp1 c where c.rowid=#temp.rowid )
update C
set expiredDate=ca.effectiveDate
,isLineup=1
from #temp c
cross apply(select top 1 effectiveDate from #temp1 c1 where c1.Flag=1 )ca
where c.isLineup=0
COMMIT
End Try
begin Catch
if (@@trancount>0)
ROLLBACK TRAN
-- log error
end Catch
-- End Main
select * from #temp
select * from #temp1
drop TABLE #temp
drop table #temp1
答案 2 :(得分:1)
我想出了答案:
declare @temp table (rowid int identity(1,1), groupNumber int, typeName varchar(10), valueA int, valueB int, effectiveDate datetime, expiredDate datetime)
insert @temp values (234545, 'Upper', 1, 1000, '1/1/18 11:31:00', '2/1/18 22:01:00')
insert @temp values (234545, 'Lower', 2, 0, '1/1/18 11:31:00', '2/1/18 22:01:00')
insert @temp values (234545, 'Upper', 1, 1000, '2/1/18 22:01:00', '4/15/18 05:39:00')
insert @temp values (234545, 'Lower', 2, 0, '2/1/18 22:01:00', '4/15/18 05:39:00')
insert @temp values (234545, 'Upper', 1, 900, '4/15/18 05:39:00', '6/1/18 10:32:00')
insert @temp values (234545, 'Lower', 2, 0, '4/15/18 05:39:00', '6/1/18 10:32:00')
insert @temp values (234545, 'Upper', 1, 900, '4/15/18 06:39:00', '6/1/18 10:32:00')
insert @temp values (234545, 'Lower', 2, 0, '4/15/18 06:39:00', '6/1/18 10:32:00')
insert @temp values (234545, 'Upper', 1, 800, '6/1/18 10:32:00', null)
insert @temp values (234545, 'Lower', 2, 0, '6/1/18 10:32:00', null)
select * from @temp
DECLARE MY_CURSOR Cursor STATIC
FOR SELECT DISTINCT groupNumber FROM @temp
Open My_Cursor
DECLARE @groupNumber int
Fetch NEXT FROM MY_Cursor INTO @groupNumber
While (@@FETCH_STATUS <> -1)
BEGIN
IF OBJECT_ID('tempdb..#temp') IS NOT NULL DROP TABLE #temp
SELECT RANK() OVER (PARTITION BY rp2.groupNumber ORDER BY rp2.EffectiveDate) AS TheRank, rp2.groupNumber, rp2.EffectiveDate,
TotalvalueA = SUM(rp2.valueA), ChecksumTotal = SUM(ISNULL(rp2.valueA,0) + ISNULL(rp2.valueB,0)), --assumes valueA and valueB can never be reversed
(
Select CAST(rp.typeName as varchar(2)) + ',' AS [text()]
From @temp rp
Where rp.groupNumber = rp2.groupNumber AND rp.groupNumber = @groupNumber
and rp.EffectiveDate = rp2.EffectiveDate
GROUP BY rp.typeName
ORDER BY MIN(rp.typeName)
For XML PATH ('')
) typesXML,
DeleteSet = 0
INTO #temp
FROM @temp rp2
WHERE rp2.groupNumber = @groupNumber
GROUP BY rp2.groupNumber, rp2.EffectiveDate
UPDATE t2
SET DeleteSet = 1
From #temp t1
LEFT JOIN #temp t2 ON t1.TheRank = t2.TheRank - 1
WHERE t1.TotalvalueA = t2.TotalvalueA AND t1.ChecksumTotal = t2.ChecksumTotal AND t1.typesXML = t2.typesXML
AND t2.TheRank <> (SELECT MAX(TheRank) FROM #temp)
BEGIN TRAN
DELETE rp
FROM @temp rp
JOIN #temp t ON t.groupNumber = rp.groupNumber AND rp.EffectiveDate = t.EffectiveDate AND t.DeleteSet = 1
if @@error != 0 raiserror('Script failed', 20, -1) with log
UPDATE rp
SET ExpiredDate = t2.NewExpiredDate
FROM @temp rp
JOIN (SELECT * , NewExpiredDate = LEAD(EffectiveDate) OVER (ORDER BY TheRank) FROM #temp WHERE DeleteSet = 0) t2 ON t2.groupNumber = rp.groupNumber AND rp.EffectiveDate = t2.EffectiveDate
JOIN #temp t ON t.TheRank = t2.TheRank + 1
WHERE rp.groupNumber = @groupNumber AND t2.NewExpiredDate IS NOT NULL AND rp.ExpiredDate <> t2.NewExpiredDate
AND t.DeleteSet = 1
if @@error != 0 raiserror('Script failed', 20, -1) with log
PRINT 'No Errors ... Committing changes for ' + CAST(@groupNumber as varchar(15))
COMMIT
--select * from @temp
--ROLLBACK
--dbcc opentran
WAITFOR DELAY '00:00:00:005';
FETCH NEXT FROM MY_CURSOR INTO @groupNumber
END
CLOSE MY_CURSOR
DEALLOCATE MY_CURSOR
GO