根据日期重叠对行进行分组

时间:2017-03-24 07:33:16

标签: sql-server tsql sql-server-2014 gaps-and-islands

在相同的ID中,如果行的生效日期和结束日期重叠,那么我们需要将其组合在唯一ID中

在下图中,dategroup是所需的输出列

数据按ID asc,EffectiveDate ASC,EndDate Desc

按顺序排序

here

{{1}}

7 个答案:

答案 0 :(得分:1)

这个答案采用了尝试识别应该递增运行DateGroup计数器的记录的方法。最终,我们会为这些记录分配1的值。有了这个分配,我们就可以简单地采用累计和来生成DateGroup

-- this CTE identifies all new ID records
WITH cte1 AS (
    SELECT t.ID, t.EffectiveDate, t.EndDate
    FROM
    (
        SELECT ID, EffectiveDate, EndDate,
               ROW_NUMBER() OVER (PARTITION BY ID ORDER BY EffectiveDate) rn
        FROM yourTable
    ) t
    WHERE t.rn = 1
),
-- this CTE identifies all records whose both effective and end dates
-- do not fall within the range of the start ID record
cte2 AS (
    SELECT t1.ID, t1.EffectiveDate, t1.EndDate
    FROM yourTable t1
    INNER JOIN cte1 t2
        ON t1.ID = t2.ID AND
           t1.EffectiveDate NOT BETWEEN t2.EffectiveDate AND t2.EndDate AND
           t1.EndDate NOT BETWEEN t2.EffectiveDate AND t2.EndDate
),
-- this CTE returns the original table with a new column, amount, which
-- contains a value of 1 should that record cause the DateGroup to be
-- incremented by 1
cte3 AS (
    SELECT t1.ID, t1.EffectiveDate, t1.EndDate,
           CASE WHEN t2.ID IS NOT NULL OR t3.ID IS NOT NULL THEN 1 ELSE 0 END AS amount
    FROM yourTable t1
    LEFT JOIN cte1 t2
        ON t1.ID = t2.ID AND
           t1.EffectiveDate = t2.EffectiveDate AND
           t1.EndDate = t2.EndDate
    LEFT JOIN cte2 t3
        ON t1.ID = t3.ID AND
           t1.EffectiveDate = t3.EffectiveDate AND
           t1.EndDate = t3.EndDate
)
-- finally, take a cumulative sum of the 'amount' column to generate the DateGroup
SELECT t1.ID,
       t1.EffectiveDate,
       t1.EndDate,
       SUM(t2.amount) AS DateGroup
FROM cte3 t1
INNER JOIN cte3 t2
    ON t1.ID >= t2.ID AND
       t1.EffectiveDate >= t2.EffectiveDate
GROUP BY t1.id, t1.EffectiveDate, t1.EndDate;

<强>输出:

enter image description here

在这里演示:

Rextester

使用的数据:

CREATE TABLE yourTable (ID int, EffectiveDate datetime, EndDate datetime);
INSERT INTO yourTable
VALUES
    (1, '2017-01-01 00:00:00.000', '2017-01-11 00:00:00.000'),
    (1, '2017-01-02 00:00:00.000', '2017-01-05 00:00:00.000'),
    (1, '2017-01-03 00:00:00.000', '2017-01-12 00:00:00.000'),
    (1, '2017-01-06 00:00:00.000', '2017-01-09 00:00:00.000'),
    (1, '2017-01-13 00:00:00.000', '2017-01-19 00:00:00.000'),
    (2, '2017-02-01 00:00:00.000', '2017-02-11 00:00:00.000'),
    (2, '2017-02-06 00:00:00.000', '2017-02-16 00:00:00.000');

答案 1 :(得分:1)

试试这个,希望它有所帮助。不是最有吸引力的代码,但它应该工作。如果我找到一些时间,我可以稍后清理它。

;WITH cte_StepOne as
(
SELECT ROW_NUMBER() OVER (ORDER BY a.[id],
    a.[EffectiveDate],
    a.[Enddate]) AS SeqNo,
    a.[id],
    a.[EffectiveDate],
    a.[Enddate],
    b.[id] AS OverLapID,
    b.[EffectiveDate] AS [OverLapEffectiveDate],
    b.[Enddate] AS [OverLapEnddate]
FROM ##DataTable a
LEFT JOIN ##DataTable b
ON a.EffectiveDate BETWEEN b.EffectiveDate
        AND b.EndDate
    AND a.EffectiveDate <> b.EffectiveDate
    AND a.EndDate <> b.EndDate --and a.ID <> b.ID
)
,cte_StepTwo AS
(
SELECT SeqNo,
    id,
    EffectiveDate,
    Enddate,
    LEAD(OverLapEffectiveDate, 1) OVER (ORDER BY SeqNo) AS LeadValue,LAG(id, 1) OVER (ORDER BY SeqNo) AS LeadValueID,
    OverLapID,
    OverLapEffectiveDate,
    OverLapEnddate
FROM cte_StepOne
)
,cte_Result AS
(
SELECT id,
    EffectiveDate,
    Enddate,
    CASE 
        WHEN LeadValue = EffectiveDate AND OverLapEffectiveDate IS NULL THEN ID
        WHEN OverLapID IS NULL THEN LeadValueID + 1
        ELSE OverLapID
    END AS OverLapID,
    CASE 
        WHEN LeadValue = EffectiveDate AND OverLapEffectiveDate IS NULL THEN EffectiveDate
        ELSE OverLapEffectiveDate
    END AS OverLapEffectiveDate,
    CASE 
        WHEN LeadValue = EffectiveDate AND OverLapEffectiveDate IS NULL THEN Enddate
        ELSE OverLapEnddate
    END AS OverLapEnddate
FROM cte_StepTwo
)
SELECT DISTINCT id,
    EffectiveDate,
    Enddate,
    DENSE_RANK() OVER (ORDER BY ID,OverLapID) AS DateGroup
FROM cte_Result
ORDER BY id,EffectiveDate

结果:

enter image description here

答案 2 :(得分:1)

这个怎么样?发布其他解决方案比较简单:

WITH 
CTE_GetFirstRecordForEachId AS
(
    SELECT 
        id,
        EffectiveDate,
        Enddate,
        rn = ROW_NUMBER() OVER (PARTITION BY id ORDER BY EffectiveDate, EndDate)
    FROM 
        #DataTable
),

CTE_GetOutOfDateRange AS
(
SELECT 
    a.*,
    OutOfDateRange = 
        CASE WHEN (b.EffectiveDate>=a.EffectiveDate AND b.EffectiveDate<=b.Enddate) OR (b.Enddate>=a.EffectiveDate AND b.Enddate<=b.Enddate) 
            THEN 0
            ELSE 1
        END
FROM 
    #DataTable a
INNER JOIN
    CTE_GetFirstRecordForEachId b ON a.id = b.id AND b.rn=1
)

SELECT
    id,
    Effectivedate,
    Enddate,
    DateGroup = DENSE_RANK() OVER (ORDER BY id, OutOfDateRange)
FROM
    CTE_GetOutOfDateRange
ORDER BY
    id, Effectivedate, Enddate

输出:

    id          Effectivedate           Enddate                 DateGroup
----------- ----------------------- ----------------------- --------------------
1           2017-01-01 00:00:00.000 2017-01-11 00:00:00.000 1
1           2017-01-02 00:00:00.000 2017-01-05 00:00:00.000 1
1           2017-01-03 00:00:00.000 2017-01-12 00:00:00.000 1
1           2017-01-06 00:00:00.000 2017-01-09 00:00:00.000 1
1           2017-01-13 00:00:00.000 2017-01-19 00:00:00.000 2
2           2017-02-01 00:00:00.000 2017-02-11 00:00:00.000 3
2           2017-02-06 00:00:00.000 2017-02-16 00:00:00.000 3

答案 3 :(得分:0)

这个怎么样(我还在测试它)

dimensions = d3.keys(cars[0]).filter(function(d) {
    return d != "name";
});

x.domain(dimensions);

dimensions.forEach(function(d) {
    y[d] = d3.scale.linear()
        .domain(d3.extent(cars, function(p) {
            return +p[d];
        }))
        .range([height, 0])
})

输出:

WITH Z AS 
  (SELECT * FROM (SELECT ID, [EffectiveDate], ENDDate
    , LAG(ID) OVER (PARTITION BY ID ORDER BY EffectiveDate, ENDDate Desc) AS ID_Prec
    , LAG(EffectiveDate) OVER (PARTITION BY ID ORDER BY EffectiveDate, ENDDate Desc) AS EffDate_Prec
    , LAG(ENDDate) OVER (PARTITION BY ID ORDER BY EffectiveDate, ENDDate Desc) AS EndDate_Prec
    , ROW_NUMBER() OVER (ORDER BY ID, EffectiveDate,ENDDate DESC) AS RN
    , 1 AS DATEGROUP
     FROM #DataTable ) C WHERE RN = 1     
    UNION ALL
     SELECT A.ID, A.EffectiveDate, A.Enddate
        , A.ID_Prec, A.EffDate_Prec
        , A.EndDate_Prec
        , A.RN
        , CASE WHEN  A.ID = A.ID_PREC AND (A.EffectiveDate <=A.EndDate_Prec /* OR A.EndDate>=A.EffDate_Prec*/) THEN Z.DATEGROUP 
          ELSE Z.DATEGROUP+1 END AS DATEGROUP
        FROM (SELECT A.ID, A.EffectiveDate, A.ENDDate
            , LAG(A.ID) OVER (PARTITION BY A.ID ORDER BY A.EffectiveDate, A.ENDDate Desc) AS ID_Prec
            , LAG(A.EffectiveDate) OVER (PARTITION BY A.ID ORDER BY A.EffectiveDate, A.ENDDate Desc) AS EffDate_Prec
            , LAG(A.ENDDate) OVER (PARTITION BY A.ID ORDER BY A.EffectiveDate, A.ENDDate Desc) AS EndDate_Prec
            , ROW_NUMBER() OVER (ORDER BY A.ID, A.EffectiveDate,A.ENDDate DESC) AS RN
            , 1 AS DATEGROUP
            FROM #DataTable A) A
     INNER JOIN Z ON A.RN -1= Z.RN
) 
SELECT ID, EffectiveDate, Enddate, DATEGROUP FROM Z

答案 4 :(得分:0)

这可能会对你有所帮助。我在这里发布了最短最简单的tsql版本......

WITH CTE AS (
SELECT *,
       ISNULL(LAG(EffectiveDate) OVER (PARTITION BY id ORDER BY id,EffectiveDate,Enddate),EffectiveDate) AS PreviousEffDate,
       ISNULL(LAG(Enddate) OVER (PARTITION BY id ORDER BY id,EffectiveDate,Enddate),Enddate) AS PreviousEndDate 
FROM #DataTable)

SELECT id,
       EffectiveDate,
       Enddate, 
       DENSE_RANK() OVER (ORDER BY id,CASE 
                                      WHEN EffectiveDate BETWEEN PreviousEffDate AND PreviousEndDate OR
                                           Enddate BETWEEN PreviousEffDate AND PreviousEndDate OR
                                           PreviousEffDate BETWEEN EffectiveDate AND Enddate OR
                                           PreviousEndDate BETWEEN EffectiveDate AND Enddate
                                       THEN 0
                                       ELSE 1
                                       END) AS DateGroup
FROM CTE

结果:

ResultSet

答案 5 :(得分:0)

猜测你在样本日期缺少一些测试场景。

;with CTE as
(
select *,ROW_NUMBER()over(order by id, effectivedate)rn  
from #DataTable
)
,CTE1 AS
(
select *, 1  New_ID 

from cte
where rn=1

union ALL

select c.id,c.effectivedate,c.enddate,c.rn

,case when c.effectivedate between c1.effectivedate
 and c1.enddate 
and c.id=c1.id  then c1.New_ID 
else c1.New_ID+1
END

  from cte c
inner join cte1 c1
on c.rn=c1.rn+1
and  c.rn>1 and c.rn<=7
)

select * from cte1  

drop table #DataTable

答案 6 :(得分:0)

从另一个论坛得到这个;根据我的要求改变。看起来简单而有效。

WITH C1 AS (
SELECT *,
   CASE WHEN EffectiveDate <= MAX(ISnull(EndDate,'9999-12-31 00:00:00.000')) OVER(partition by id ORDER BY EffectiveDate  ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) THEN 0 ELSE 1 END AS isstart
FROM #DataTable 
) 
SELECT ID,EffectiveDate,EndDate,
   SUM(isstart) OVER(ORDER BY ID ROWS UNBOUNDED PRECEDING) AS DG 
FROM C1