我一直在努力解决这个问题,我需要一些帮助。 我有以下查询:
CREATE TABLE Example(
Start NVARCHAR(8),
Endd NVARCHAR(8),
Col1 NVARCHAR(2),
Col2 NVARCHAR(2));
INSERT into Example (Start,Endd,Col1,Col2)
VALUES ('20130801','20140316','02','01'),
('20140317','20140319','04','02'),
('20140320','20140320','04','02'),
('20140321','20140421','02','Z8'),
('20140422','20140429','02','Z9'),
('20140430','20140902','04','02'),
('20140903','20150201','04','02'),
('20150202','20150223','04','02'),
('20150224','20150527','04','02'),
('20150528','99991231','04','02')
;
select MIN(Start)AS Start,MAX(Endd) AS Endd,Col1,Col2 from
(
SELECT top (100000000) Start, Endd,Col1, Col2,dense_rank() over(partition by Col1, Col2 order by Start,Endd) as rank
,LEAD (Col1) OVER (order by Start,Endd DESC) as l1
,LEAD (Col2) OVER (order by Start,Endd DESC) as l2
,LAG (Col1) OVER (order by Start,Endd DESC) as l11
,LAG (Col2) OVER (order by Start,Endd DESC) as l22
FROM Example sp
order by Start,Endd
)rq
GROUP BY Col1,Col2,case when (rq.l1=Col1 and rq.l2=Col2) or (rq.l11=Col1 and rq.l22=Col2) then 0 else rank end
order by Start,Endd;
我的目标是合并这些数据以获得以下结果:
但是,您可以在查询结果中看到,当我在不同时间段内具有相同的Col1和Col2值时,合并未正确完成。它基本上试图将它们合并在一起,这会在新时期的价值中产生问题。
有人能帮助我吗?
答案 0 :(得分:0)
您的查询越来越近了,您现在可能已经找到了解决方案。这是一个典型的群岛和空白问题。我提供较长的版本而不使用LEAD
和LAG
。您可以使用那些可能具有密集排名的窗口函数来替换下面45%的代码。
DECLARE @Example TABLE(
Start NVARCHAR(8),
Endd NVARCHAR(8),
Col1 NVARCHAR(2),
Col2 NVARCHAR(2));
INSERT into @Example (Start,Endd,Col1,Col2)
VALUES ('20130801','20140316','02','01'),
('20140317','20140319','04','02'),
('20140320','20140320','04','02'),
('20140321','20140421','02','Z8'),
('20140422','20140429','02','Z9'),
('20140430','20140902','04','02'),
('20140903','20150201','04','02'),
('20150202','20150223','04','02'),
('20150224','20150527','04','02'),
('20150528','99991231','04','02')
SELECT
TableID=MAX(TableID),Col1=MAX(Col1),Col2=MAX(Col2),Start=MIN(Start),Endd=MAX(Endd)
FROM
(
SELECT
TableID,Col1,Col2,Start,Endd,ChangeID=MAX(ChangeOnlyTableID)
FROM
(
SELECT
AllRecords.TableID,AllRecords.Col1,AllRecords.Col2,AllRecords.Start,AllRecords.Endd,ChangeOnlyTableID=ChangesOnly.TableID
FROM
(
SELECT * FROM
(
SELECT
This.Start,This.Endd,This.TableID,This.Col1,This.Col2,
Changed=CASE WHEN (Next.Col1=This.Col1 AND Next.Col2=This.Col2) THEN 0 ELSE 1 END
FROM
(
SELECT TableID=ROW_NUMBER() OVER(ORDER BY Start,Endd,Col1,Col2),Start,Endd,Col1,Col2 FROM @Example
)AS This
LEFT OUTER JOIN
(
SELECT TableID=ROW_NUMBER() OVER(ORDER BY Start,Endd,Col1,Col2),Start,Endd,Col1,Col2 FROM @Example
)
AS Next ON This.TableID=Next.TableID+1
)
AS ChangeMarkers
WHERE Changed=1
)
AS AllRecords
INNER JOIN
(
SELECT * FROM
(
SELECT
This.Start,This.Endd,This.TableID,This.Col1,This.Col2,
Changed=CASE WHEN (Next.Col1=This.Col1 AND Next.Col2=This.Col2) THEN 0 ELSE 1 END
FROM
(
SELECT TableID=ROW_NUMBER() OVER(ORDER BY Start,Endd,Col1,Col2),Start,Endd,Col1,Col2 FROM @Example
) AS This
LEFT OUTER JOIN
(
SELECT TableID=ROW_NUMBER() OVER(ORDER BY Start,Endd,Col1,Col2),Start,Endd,Col1,Col2 FROM @Example
) AS Next ON This.TableID=Next.TableID+1
)
AS ChangeMarkers
WHERE Changed=1
)
AS ChangesOnly ON ChangesOnly.Col1=AllRecords.Col1 AND ChangesOnly.Col2=AllRecords.Col2 AND ChangesOnly.TableID<=AllRecords.TableID
)AS JoinedResults
GROUP BY
TableID,Col1,Col2,Start,Endd
)
AS Final
GROUP BY
Col1,Col2,ChangeID
ORDER BY
MAX(TableID)
你可以选择用一些CTE缩短这一点来产生一个查询,例如:
;WITH TableWithIDs AS
(
SELECT TableID=ROW_NUMBER() OVER(ORDER BY Start,Endd,Col1,Col2),Start,Endd,Col1,Col2 FROM @Example
)
,ChangeMarkers AS
(
SELECT
This.Start,This.Endd,This.TableID,This.Col1,This.Col2,
Changed=CASE WHEN (Next.Col1=This.Col1 AND Next.Col2=This.Col2) THEN 0 ELSE 1 END
FROM
TableWithIDs AS This
LEFT OUTER JOIN TableWithIDs AS Next ON This.TableID=Next.TableID+1
)
,ChangesOnly AS
(
SELECT * FROM ChangeMarkers WHERE Changed=1
)
,
JoinedResults AS
(
SELECT
AllRecords.TableID,AllRecords.Col1,AllRecords.Col2,AllRecords.Start,AllRecords.Endd,ChangeOnlyTableID=ChangesOnly.TableID
FROM
ChangeMarkers AllRecords
INNER JOIN ChangesOnly ON ChangesOnly.Col1=AllRecords.Col1 AND ChangesOnly.Col2=AllRecords.Col2 AND ChangesOnly.TableID<=AllRecords.TableID
)
SELECT
TableID=MAX(TableID),Col1=MAX(Col1),Col2=MAX(Col2),Start=MIN(Start),Endd=MAX(Endd)
FROM
(
SELECT
TableID,Col1,Col2,Start,Endd,ChangeID=MAX(ChangeOnlyTableID)
FROM
JoinedResults
GROUP BY
TableID,Col1,Col2,Start,Endd
)
AS Final
GROUP BY
Col1,Col2,ChangeID
ORDER BY
MAX(TableID)
还有一些聪明的黑客可以使用虚拟键进一步应用,但我走的是最直接但更冗长的路线。您应该可以使用DENSE_RANK()
LEAD()
或LAG()