合并具有相同编号和相同金额的连续日期范围

时间:2019-11-06 15:41:37

标签: sql sql-server tsql

我有以下数据集,其中有身份证的人,其身份可以随时间变化,因此更改记录为日期范围。有时,日期范围存在空白,没有可用的信息。没事。但是我想合并连续的日期范围,当它们跨越多个记录(如ID = 1的第2行和第3行)时,它们恰好具有相同的金额。

DECLARE @DataTable TABLE (
    ID [int] NULL,
    StartDate [date] NULL,
    EndDate [date] NULL,
    Amount [decimal](12,2) NULL
)

INSERT INTO @DataTable
SELECT 1, '20180101','20180513', 10.00 UNION ALL
SELECT 1, '20180630','20190301', 15.00 UNION ALL
SELECT 1, '20190302','20190615', 15.00 UNION ALL
SELECT 1, '20190616','20991231', 5.00 UNION ALL
SELECT 2, '20190101','20190331', 35.00 UNION ALL
SELECT 2, '20190401','20191031', 30.00  UNION ALL
SELECT 3, '20180505','20180930', 19.00 UNION ALL
SELECT 3, '20181001','20190228', 1.00 UNION ALL
SELECT 3, '20190501','20190815', 1.00 UNION ALL
SELECT 3, '20190819','20190827', 5.00 UNION ALL
SELECT 3, '20190828','20991231', 1.00 UNION ALL
SELECT 4, '2017-10-01', '2017-12-31',   688.96 UNION ALL
SELECT 4, '2018-01-01', '2018-04-30',   707.96 UNION ALL
SELECT 4, '2018-05-01', '2018-05-31',   783.96 UNION ALL
SELECT 4, '2018-06-01', '2018-12-31',   707.96 UNION ALL
SELECT 4, '2019-01-01', '2019-03-31',   707.96 UNION ALL
SELECT 4, '2019-04-01', '2019-04-30',   571.46 UNION ALL
SELECT 4, '2019-05-01', '2019-06-30',   707.96 UNION ALL
SELECT 4, '2019-07-01', '2099-12-31',   707.96
;

Image of table

我通过使用dimDate在开始日期和结束日期之间生成日期行来解决了该问题,然后保留了记录,记录中的金额与以前的记录相比有所更改,或者ID的日期存在间隔。然后,我使用下一个记录日期作为结束日期。查询如下:

WITH DateList AS (
SELECT DT.*, DD.DateOnly AS RecordDate
FROM @DataTable DT
INNER JOIN dimDate DD ON DT.StartDate <= DD.DateOnly AND CASE WHEN DT.EndDate > GETDATE() THEN CONVERT(DATE,GETDATE()) ELSE DT.EndDate END >= DD.DateOnly
)

, PrevValue AS (
SELECT 
    *
    , LAG(RecordDate) OVER (PARTITION BY ID ORDER BY RecordDate) AS PrevDate
    , LAG(Amount) OVER (PARTITION BY ID ORDER BY RecordDate) AS PrevAmt
FROM DateList 
)
, KeepHistory AS (
SELECT 
    *
FROM PrevValue
WHERE Amount <> PrevAmt OR PrevAmt IS NULL OR DATEADD(DAY,1,PrevDate) <> RecordDate OR PrevDate IS NULL
)
, FINAL AS (
SELECT 
    *
    , LEAD(PrevDate) OVER (PARTITION BY ID ORDER BY StartDate) AS NextEndDate
FROM KeepHistory
)

SELECT 
    ID 
    , StartDate
    , CASE WHEN NextEndDate > EndDate THEN NextEndDate ELSE EndDate END AS EndDate
    , Amount
FROM FINAL 

Final result with row 2 and 3 are merged into 2

我的问题是有一种替代方法可以解决此问题,而无需经过dimDate或在开始日期和结束日期之间生成日期。我可以简单地通过使用诸如here之类的窗口函数和间隙问题来实现这一点吗?

如果您当前的解决方案有任何问题,请告诉我。 谢谢。

关于@Larnu响应,更新后的查询适用于大多数情况。我添加了ID = 4的示例,在合并其第四行和第五行时似乎会引起问题。当第六行的数量不同时,它也会合并第七行。

ID = 4 4th, 5th and 7th are merged; 6th one has different amount

2 个答案:

答案 0 :(得分:1)

这是你的追求吗?

WITH Gaps AS(
    SELECT DT.ID,
           DT.StartDate,
           DT.EndDate,
           DT.Amount,
           DATEDIFF(DAY,LAG(DATEADD(DAY,1,DT.EndDate),1,DT.StartDate) OVER (PARTITION BY DT.ID, DT.Amount ORDER BY DT.StartDate ASC), DT.StartDate) AS Gap
    FROM @DataTable DT),
Grps AS(
    SELECT G.ID,
           G.StartDate,
           G.EndDate,
           G.Amount,
           ROW_NUMBER() OVER (PARTITION BY G.ID ORDER BY G.StartDate) - 
           ROW_NUMBER() OVER (PARTITION BY G.ID,Amount ORDER BY G.StartDate) + Gap AS Grp
    FROM Gaps G)
SELECT G.ID,
       MIN(G.StartDate) AS StartDate,
       MAX(EndDate) AS EndDate,
       G.Amount
FROM Grps G
GROUP BY G.ID,
         G.Amount,
         G.Grp
ORDER BY ID,
         StartDate;

添加了唯一ID,以避开“功能”:

DECLARE @DataTable TABLE (
    UniqueID int IDENTITY(1,1),
    ID [int] NULL,
    StartDate [date] NULL,
    EndDate [date] NULL,
    Amount [decimal](12,2) NULL
)

INSERT INTO @DataTable
SELECT 1, '20180101','20180513', 10.00 UNION ALL
SELECT 1, '20180630','20190301', 15.00 UNION ALL
SELECT 1, '20190302','20190615', 15.00 UNION ALL
SELECT 1, '20190616','20991231', 5.00 UNION ALL
SELECT 2, '20190101','20190331', 35.00 UNION ALL
SELECT 2, '20190401','20191031', 30.00  UNION ALL
SELECT 3, '20180505','20180930', 19.00 UNION ALL
SELECT 3, '20181001','20190228', 1.00 UNION ALL
SELECT 3, '20190501','20190815', 1.00 UNION ALL
SELECT 3, '20190819','20190827', 5.00 UNION ALL
SELECT 3, '20190828','20991231', 1.00 UNION ALL
SELECT 4, '20171001', '20171231',   688.96 UNION ALL
SELECT 4, '20180101', '20180430',   707.96 UNION ALL
SELECT 4, '20180501', '20180531',   783.96 UNION ALL
SELECT 4, '20180601', '20181231',   707.96 UNION ALL
SELECT 4, '20190101', '20190331',   707.96 UNION ALL
SELECT 4, '20190401', '20190430',   571.46 UNION ALL
SELECT 4, '20190501', '20190630',   707.96 UNION ALL
SELECT 4, '20190701', '20991231',   707.96;

--SELECT *
--FROM @DataTable;

WITH Gaps AS(
    SELECT DT.UniqueID,
           DT.ID,
           DT.StartDate,
           DT.EndDate,
           DT.Amount,
           DATEDIFF(DAY,LAG(DATEADD(DAY,1,DT.EndDate),1,DT.StartDate) OVER (PARTITION BY DT.ID, DT.Amount ORDER BY DT.UniqueID ASC), DT.StartDate) AS Gap
    FROM @DataTable DT),
Grps AS(
    SELECT G.UniqueID,
           G.ID,
           G.StartDate,
           G.EndDate,
           G.Amount,
           G.Gap,
           ROW_NUMBER() OVER (PARTITION BY G.ID ORDER BY G.UniqueID) - 
           ROW_NUMBER() OVER (PARTITION BY G.ID,Amount ORDER BY G.UniqueID) + (Gap * UniqueID) AS Grp
    FROM Gaps G)
SELECT G.ID,
       MIN(G.StartDate) AS StartDate,
       MAX(EndDate) AS EndDate,
       G.Amount
FROM Grps G
GROUP BY G.ID,
         G.Amount,
         G.Grp
ORDER BY ID,
         StartDate;

答案 1 :(得分:0)

您可以按以下方式使用递归

    DECLARE @DataTable TABLE (
        ID [int] NULL,
        StartDate [date] NULL,
        EndDate [date] NULL,
        Amount [decimal](12,2) NULL
    )

    INSERT INTO @DataTable
    SELECT 1, '20180101','20180513', 10.00 UNION ALL
    SELECT 1, '20180630','20190301', 15.00 UNION ALL
    SELECT 1, '20190302','20190615', 15.00 UNION ALL
    SELECT 1, '20190616','20991231', 5.00 UNION ALL
    SELECT 2, '20190101','20190331', 35.00 UNION ALL
    SELECT 2, '20190401','20191031', 30.00  UNION ALL
    SELECT 3, '20180505','20180930', 19.00 UNION ALL
    SELECT 3, '20181001','20190228', 1.00 UNION ALL
    SELECT 3, '20190501','20190815', 1.00 UNION ALL
    SELECT 3, '20190819','20190827', 5.00 UNION ALL
    SELECT 3, '20190828','20991231', 1.00;


    ;with cte as(
    select t1.id,
t1.startDate,
t1.EndDate,
t1.Amount,
     row_number() over (partition by t1.id order by t1.Id,t1.startDate) R#  
from @DataTable t1
    left outer join @DataTable t2 on dateadd(d,-1,t1.startdate)=t2.enddate and t1.Amount=t2.Amount
    and t1.id=t2.id
    where t2.amount is null),
    cte1 as
    (select 
t1.id,
t1.startDate,
t1.EndDate,
t1.Amount,
t1.R# from cte t1

    union all

    select t1.id,
    t2.startdate,
t1.endDate, 
t1.amount,
R# from @DataTable t1 inner join cte1 t2
    on dateadd(d,-1,t1.startdate)=t2.enddate and t1.Amount=t2.Amount)
    select id,
min(stArtdate)[startdate],
max(enddate)[enddate],
min(amount)[amount] from cte1 GROUP by R#,id 
    Order  by id,max(enddate)