SQL Server - 合并/分组连续时间戳

时间:2016-07-09 21:52:34

标签: sql sql-server

我有一张表(RECORDS),其中包含从何时到项目链接到父项的记录:

CHILD   STARTDATE                  ENDDATE                     PARENT
207     2013-12-18 12:45:59.017    2014-01-09 18:16:01.227     NULL
207     2014-01-09 18:16:01.227    2016-03-03 09:54:28.757     NULL
207     2016-03-03 09:54:28.757    2100-01-01 00:00:00.000     NULL
10558   2016-03-03 18:10:34.877    2016-04-05 10:25:22.860     10557
10558   2016-04-05 10:25:22.860    2016-04-05 11:33:10.493     10557
10558   2016-04-05 11:33:10.493    2100-01-01 00:00:00.000     10557
10596   2016-05-15 16:55:15.970    2016-05-16 10:00:00.000     10592
10596   2016-05-16 10:00:00.000    2016-05-17 10:00:00.000     10593
10596   2016-05-17 10:00:00.000    2100-01-01 00:00:00.000     10592
10600   2012-12-18 12:45:59.017    2100-01-01 00:00:00.000     10599

我在4个主要案例上面显示:
对于同一个孩子,具有空值的新记录(207)
对于同一个孩子,具有相同父值的新记录(10558)
对于同一个孩子,父母的新记录会更改(10596)
对于同一个孩子,只有一个记录(10600)。

由于子项目可以更改回较旧的父项,使用:

SELECT 
  CHILD 
, MIN(STARTDATE) as STARTDATE
, MAX(ENDDATE) as ENDDATE
, PARENT 
FROM RECORDS
GROUP BY CHILD, PARENT

会导致项目10596出现以下不良后果:

207     2013-12-18 12:45:59.017     2100-01-01 00:00:00.000     NULL
10558   2016-03-03 18:10:34.877     2100-01-01 00:00:00.000     10557
10596   2016-05-15 16:55:15.970     2100-01-01 00:00:00.000     10592
10596   2016-05-16 10:00:00.000     2016-05-17 10:00:00.000     10593
10600   2012-12-18 12:45:59.017     2100-01-01 00:00:00.000     10599

期望的结果是:

207     2013-12-18 12:45:59.017     2100-01-01 00:00:00.000     NULL
10558   2016-03-03 18:10:34.877     2100-01-01 00:00:00.000     10557
10596   2016-05-15 16:55:15.970     2016-05-16 10:00:00.000     10592
10596   2016-05-16 10:00:00.000     2016-05-17 10:00:00.000     10593
10596   2016-05-17 10:00:00.000     2100-01-01 00:00:00.000     10592
10600   2012-12-18 12:45:59.017     2100-01-01 00:00:00.000     10599

知道如何实现这个目标吗?

2 个答案:

答案 0 :(得分:0)

以下假设您的记录中存在连续性(无重叠):

WITH cte
AS
(
    SELECT
        *
        , ROW_NUMBER() OVER (PARTITION BY CHILD ORDER BY STARTDATE) r
    FROM RECORDS
)

,
cte2
AS
(
    SELECT
        t1.CHILD CHILD1
        , t1.STARTDATE STARTDATE1
        , t1.ENDDATE ENDDATE1
        , t1.PARENT PARENT1
        , t1.r r1
        , t2.CHILD CHILD2
        , t2.STARTDATE STARTDATE2
        , t2.ENDDATE ENDDATE2
        , t2.PARENT PARENT2
        , t2.r r2
    FROM
        cte t1
        LEFT JOIN cte t2 ON
            t1.CHILD = t2.CHILD
            AND t1.r = t2.r - 1
    WHERE
        t1.PARENT != t2.PARENT
        OR (t1.PARENT IS NULL AND t2.PARENT IS NOT NULL)
        OR (t2.PARENT IS NULL AND t1.PARENT IS NOT NULL)
        OR t2.r IS NULL
)

SELECT
    CHILD1 CHILD
    ,
        ISNULL
            (
                (
                    SELECT MIN(STARTDATE)
                    FROM RECORDS
                    WHERE
                        CHILD = CHILD1
                        AND (PARENT = PARENT1 OR (PARENT IS NULL AND PARENT1 IS NULL))
                        AND STARTDATE >=
                            (
                                SELECT ENDDATE1
                                FROM cte2 cte2INNER
                                WHERE
                                    cte2INNER.CHILD1 = cte2OUTER.CHILD1
                                    AND cte2INNER.r2 =
                                        (
                                            SELECT MAX(cteINNER2.r2)
                                            FROM cte2 cteINNER2
                                            WHERE
                                                cteINNER2.CHILD1 = cte2OUTER.CHILD1
                                                AND cteINNER2.r2 <= cte2OUTER.r1
                                        )
                            )
                )
                ,
                    (
                        SELECT MIN(STARTDATE)
                        FROM RECORDS
                        WHERE CHILD = CHILD1
                    )
            )
            STARTDATE
    , ENDDATE1 ENDDATE
    , PARENT1 PARENT
FROM cte2 cte2OUTER

它使用临时表快速运行:

WITH cte
AS
(
    SELECT
        *
        , ROW_NUMBER() OVER (PARTITION BY CHILD ORDER BY STARTDATE) r
    FROM RECORDS
)


SELECT
    t1.CHILD CHILD1
    , t1.STARTDATE STARTDATE1
    , t1.ENDDATE ENDDATE1
    , t1.PARENT PARENT1
    , t1.r r1
    , t2.CHILD CHILD2
    , t2.STARTDATE STARTDATE2
    , t2.ENDDATE ENDDATE2
    , t2.PARENT PARENT2
    , t2.r r2
INTO #RECORDSTEMP
FROM
    cte t1
    LEFT JOIN cte t2 ON
        t1.CHILD = t2.CHILD
        AND t1.r = t2.r - 1
WHERE
    t1.PARENT != t2.PARENT
    OR (t1.PARENT IS NULL AND t2.PARENT IS NOT NULL)
    OR (t2.PARENT IS NULL AND t1.PARENT IS NOT NULL)
    OR t2.r IS NULL

SELECT
    CHILD1 CHILD
    ,
        ISNULL
            (
                (
                    SELECT MIN(STARTDATE)
                    FROM RECORDS
                    WHERE
                        CHILD = CHILD1
                        AND (PARENT = PARENT1 OR (PARENT IS NULL AND PARENT1 IS NULL))
                        AND STARTDATE >=
                            (
                                SELECT ENDDATE1
                                FROM #RECORDSTEMP rtmpINNER
                                WHERE
                                    rtmpINNER.CHILD1 = rtmpOUTER.CHILD1
                                    AND rtmpINNER.r2 =
                                        (
                                            SELECT MAX(rtmpINNER2.r2)
                                            FROM #RECORDSTEMP rtmpINNER2
                                            WHERE
                                                rtmpINNER2.CHILD1 = rtmpOUTER.CHILD1
                                                AND rtmpINNER2.r2 <= rtmpOUTER.r1
                                        )
                            )
                )
                ,
                    (
                        SELECT MIN(STARTDATE)
                        FROM RECORDS
                        WHERE CHILD = CHILD1
                    )
            )
            STARTDATE
    , ENDDATE1 ENDDATE
    , PARENT1 PARENT
FROM #RECORDSTEMP rtmpOUTER

DROP TABLE #RECORDSTEMP

答案 1 :(得分:0)

感谢您的回答,它确实有效。

我也收到了这个解决方案:

WITH DISTINCTRECORDS as (
                SELECT 
                CHILD
                , 
                CASE
                WHEN
                (
                    CHILD = lag(CHILD) over(order by CHILD,STARTDATE, ENDDATE, PARENT) 
                    AND
                    STARTDATE = lag(ENDDATE) over(order by CHILD,STARTDATE, ENDDATE, PARENT) 
                    AND
                    (PARENT = lag(PARENT) over(order by CHILD,STARTDATE, ENDDATE, PARENT) OR ( PARENT is null AND lag(PARENT) over(order by CHILD,STARTDATE, ENDDATE, PARENT) is null  ))
                )
                    OR
                (
                    CHILD = lead(CHILD) over(order by CHILD,STARTDATE, ENDDATE, PARENT) 
                    AND
                    ENDDATE = lead(STARTDATE) over(order by CHILD,STARTDATE, ENDDATE, PARENT) 
                    AND
                    (PARENT = lead(PARENT) over(order by CHILD,STARTDATE, ENDDATE, PARENT) OR ( PARENT is null AND lead(PARENT) over(order by CHILD,STARTDATE, ENDDATE, PARENT) is null  ))
                )
                THEN 1
                ELSE 0
                END ToBeMerged
                ,   STARTDATE
                ,   ENDDATE
                ,   PARENT 

                FROM RECORDS
)

SELECT CHILD, min(STARTDATE) STARTDATE, max(ENDDATE) ENDDATE, PARENT FROM DISTINCTRECORDS WHERE ToBeMerged = 1 GROUP BY CHILD, PARENT
UNION ALL
SELECT CHILD, STARTDATE, ENDDATE, PARENT FROM DISTINCTRECORDS WHERE ToBeMerged = 0

哪一个会有最好的表现?