SQL Server 2012+:按时间段合并元素

时间:2017-03-29 15:21:36

标签: sql sql-server merge

我一直在努力解决这个问题,我需要一些帮助。 我有以下查询:

CREATE TABLE Example(
Start NVARCHAR(8),
Endd NVARCHAR(8),
Col1 NVARCHAR(2),
Col2 NVARCHAR(2));

INSERT into Example (Start,Endd,Col1,Col2)
VALUES ('20130801','20140316','02','01'),
('20140317','20140319','04','02'),
('20140320','20140320','04','02'),
('20140321','20140421','02','Z8'),
('20140422','20140429','02','Z9'),
('20140430','20140902','04','02'),
('20140903','20150201','04','02'),
('20150202','20150223','04','02'),
('20150224','20150527','04','02'),
('20150528','99991231','04','02')
;

select MIN(Start)AS Start,MAX(Endd) AS Endd,Col1,Col2 from 
(
    SELECT top (100000000) Start, Endd,Col1, Col2,dense_rank() over(partition by Col1, Col2 order by Start,Endd) as rank 
    ,LEAD (Col1) OVER (order by Start,Endd DESC) as l1
    ,LEAD (Col2) OVER (order by Start,Endd DESC) as l2
    ,LAG (Col1) OVER (order by Start,Endd DESC) as l11
    ,LAG (Col2) OVER (order by Start,Endd DESC) as l22
    FROM Example sp   
    order by Start,Endd
)rq
GROUP BY Col1,Col2,case when (rq.l1=Col1 and rq.l2=Col2) or (rq.l11=Col1 and rq.l22=Col2)  then 0 else rank end
order by Start,Endd;

我的目标是合并这些数据以获得以下结果:

enter image description here

但是,您可以在查询结果中看到,当我在不同时间段内具有相同的Col1和Col2值时,合并未正确完成。它基本上试图将它们合并在一起,这会在新时期的价值中产生问题。

有人能帮助我吗?

1 个答案:

答案 0 :(得分:0)

您的查询越来越近了,您现在可能已经找到了解决方案。这是一个典型的群岛和空白问题。我提供较长的版本而不使用LEADLAG。您可以使用那些可能具有密集排名的窗口函数来替换下面45​​%的代码。

DECLARE  @Example TABLE(
Start NVARCHAR(8),
Endd NVARCHAR(8),
Col1 NVARCHAR(2),
Col2 NVARCHAR(2));

INSERT into @Example (Start,Endd,Col1,Col2)
VALUES ('20130801','20140316','02','01'),
('20140317','20140319','04','02'),
('20140320','20140320','04','02'),
('20140321','20140421','02','Z8'),
('20140422','20140429','02','Z9'),
('20140430','20140902','04','02'),
('20140903','20150201','04','02'),
('20150202','20150223','04','02'),
('20150224','20150527','04','02'),
('20150528','99991231','04','02')

SELECT
    TableID=MAX(TableID),Col1=MAX(Col1),Col2=MAX(Col2),Start=MIN(Start),Endd=MAX(Endd)
FROM
(
    SELECT
        TableID,Col1,Col2,Start,Endd,ChangeID=MAX(ChangeOnlyTableID) 
    FROM 
    (
        SELECT
            AllRecords.TableID,AllRecords.Col1,AllRecords.Col2,AllRecords.Start,AllRecords.Endd,ChangeOnlyTableID=ChangesOnly.TableID
        FROM
        (
            SELECT * FROM
            (
                SELECT
                    This.Start,This.Endd,This.TableID,This.Col1,This.Col2,
                    Changed=CASE WHEN (Next.Col1=This.Col1 AND Next.Col2=This.Col2) THEN 0 ELSE 1 END
                FROM
                ( 
                    SELECT TableID=ROW_NUMBER() OVER(ORDER BY Start,Endd,Col1,Col2),Start,Endd,Col1,Col2 FROM @Example 
                )AS This
                LEFT OUTER JOIN
                (
                    SELECT TableID=ROW_NUMBER() OVER(ORDER BY Start,Endd,Col1,Col2),Start,Endd,Col1,Col2 FROM @Example 
                )
                AS Next ON This.TableID=Next.TableID+1
            )
            AS ChangeMarkers
            WHERE Changed=1
        )
        AS AllRecords
        INNER JOIN 
        (
            SELECT * FROM
            (
                SELECT
                    This.Start,This.Endd,This.TableID,This.Col1,This.Col2,
                    Changed=CASE WHEN (Next.Col1=This.Col1 AND Next.Col2=This.Col2) THEN 0 ELSE 1 END
                FROM
                    (
                        SELECT TableID=ROW_NUMBER() OVER(ORDER BY Start,Endd,Col1,Col2),Start,Endd,Col1,Col2 FROM @Example 
                    ) AS This
                    LEFT OUTER JOIN 
                    (
                        SELECT TableID=ROW_NUMBER() OVER(ORDER BY Start,Endd,Col1,Col2),Start,Endd,Col1,Col2 FROM @Example 
                    ) AS Next ON This.TableID=Next.TableID+1
            )
            AS ChangeMarkers
                WHERE Changed=1
        ) 
        AS ChangesOnly ON ChangesOnly.Col1=AllRecords.Col1 AND ChangesOnly.Col2=AllRecords.Col2 AND ChangesOnly.TableID<=AllRecords.TableID
    )AS JoinedResults
    GROUP BY
        TableID,Col1,Col2,Start,Endd
)
AS Final 
GROUP BY 
    Col1,Col2,ChangeID
ORDER BY
    MAX(TableID)

你可以选择用一些CTE缩短这一点来产生一个查询,例如:

;WITH TableWithIDs AS
( 
    SELECT TableID=ROW_NUMBER() OVER(ORDER BY Start,Endd,Col1,Col2),Start,Endd,Col1,Col2 FROM @Example 
)
,ChangeMarkers AS
(
    SELECT
        This.Start,This.Endd,This.TableID,This.Col1,This.Col2,
        Changed=CASE WHEN (Next.Col1=This.Col1 AND Next.Col2=This.Col2) THEN 0 ELSE 1 END
    FROM
        TableWithIDs AS This
        LEFT OUTER JOIN TableWithIDs AS Next ON This.TableID=Next.TableID+1
)
,ChangesOnly AS
(
    SELECT * FROM ChangeMarkers WHERE Changed=1
)
,
JoinedResults AS
(
    SELECT
        AllRecords.TableID,AllRecords.Col1,AllRecords.Col2,AllRecords.Start,AllRecords.Endd,ChangeOnlyTableID=ChangesOnly.TableID
    FROM
        ChangeMarkers AllRecords
        INNER JOIN ChangesOnly ON ChangesOnly.Col1=AllRecords.Col1 AND ChangesOnly.Col2=AllRecords.Col2 AND ChangesOnly.TableID<=AllRecords.TableID
)

SELECT
    TableID=MAX(TableID),Col1=MAX(Col1),Col2=MAX(Col2),Start=MIN(Start),Endd=MAX(Endd)
FROM
(
    SELECT
        TableID,Col1,Col2,Start,Endd,ChangeID=MAX(ChangeOnlyTableID) 
    FROM 
        JoinedResults
    GROUP BY
        TableID,Col1,Col2,Start,Endd
)
AS Final 
GROUP BY 
    Col1,Col2,ChangeID
ORDER BY
    MAX(TableID)

还有一些聪明的黑客可以使用虚拟键进一步应用,但我走的是最直接但更冗长的路线。您应该可以使用DENSE_RANK() LEAD()LAG()

来改善此问题