我有一个以下格式的表
Id StartDate EndDate Type
1 2012-02-18 2012-03-18 1
1 2012-03-19 2012-06-19 1
1 2012-06-27 2012-09-27 1
1 2014-08-23 2014-09-24 3
1 2014-09-23 2014-10-24 3
1 2014-10-23 2014-11-24 3
2 2015-07-04 2015-08-06 1
2 2015-08-04 2015-09-06 1
3 2013-11-01 2013-12-01 0
3 2018-01-09 2018-02-09 0
我在这里找到了类似的问题,但是没有什么可以帮助我解决问题的。我想合并具有相同的Id
,Type
和重叠的日期期间的行。但是,如果前14天的EndDate
和StartDate
之间存在差距,我想将其视为重叠。
上表的结果应该是
Id StartDate EndDate Type
1 2012-02-18 2012-09-27 1
1 2014-08-23 2014-11-24 3
2 2015-07-04 2015-09-06 1
3 2013-11-01 2013-12-01 0
3 2018-01-09 2018-02-09 0
在另一台服务器上,我可以做到以下限制和以下查询:
Type
列,而只关心Id
SELECT Id
, MIN(StartDate) AS StartDate
, MAX(EndDate) AS EndDate
FROM (
SELECT *
, SUM(CASE WHEN a.EndDate = a.StartDate THEN 0
ELSE 1
END
) OVER (ORDER BY Id, StartDate) sm
FROM (
SELECT Id
, StartDate
, EndDate
, LAG(EndDate, 1, NULL) OVER (PARTITION BY Id ORDER BY Id, EndDate) EndDate
FROM #temptable
) a
) b
GROUP BY Id, sm
任何建议我怎么做
答案 0 :(得分:7)
此方法使用附加的临时表来标识重叠日期的组,然后根据这些组执行快速汇总。
SELECT *, ROW_NUMBER() OVER (ORDER BY Id, Type) AS UID,
ROW_NUMBER() OVER (ORDER BY Id, Type) AS GroupId INTO #G FROM #TempTable
WHILE @@ROWCOUNT <> 0 BEGIN
UPDATE T1 SET
GroupId = T2.GroupId
FROM #G T1
INNER JOIN (
SELECT T1.UID, CASE WHEN T1.GroupId < T2.GroupId THEN T1.GroupId ELSE T2.GroupId END
FROM #G T1
LEFT OUTER JOIN #G T2
ON T1.Id = T2.Id AND T1.Type = T2.Type AND T1.GroupId <> T2.GroupId
AND T1.StartDate <= T2.EndDate AND T2.StartDate <= T1.EndDate
) T2 (UID, GroupId)
ON T1.UID = T2.UID
WHERE T1.GroupId <> T2.GroupId
END
SELECT Id, MIN(StartDate) AS StartDate, MAX(EndDate) AS EndDate, Type
FROM #G G GROUP BY GroupId, Id, Type
这将返回期望值
Id StartDate EndDate Type
----------- ---------- ---------- -----------
1 2012-02-18 2012-09-27 1
1 2014-08-23 2014-11-24 3
2 2015-07-04 2015-09-06 1
3 2013-11-01 2013-12-01 0
3 2018-01-09 2018-02-09 0
答案 1 :(得分:2)
这与2008年兼容。我认为,CTE实际上是链接所有重叠记录的最佳方法。日期重叠逻辑来自以下线程:SO Date Overlap
我添加了更复杂的额外数据,以确保其正常工作。
DECLARE @Data table (Id INT, StartDate DATE, EndDate DATE, Type INT)
INSERT INTO @data
SELECT 1,'2/18/2012' ,'3/18/2012', 1 UNION ALL
select 1,'3/17/2012','6/29/2012',1 UNION ALL
select 1,'6/27/2012','9/27/2012',1 UNION ALL
select 1,'8/23/2014','9/24/2014',3 UNION ALL
select 1,'9/23/2014','10/24/2014',3 UNION ALL
select 1,'10/23/2014','11/24/2014',3 UNION ALL
select 2,'7/4/2015','8/6/2015',1 UNION ALL
select 2,'8/4/2015','9/6/2015',1 UNION ALL
select 3,'11/1/2013','12/1/2013',0 UNION ALL
select 3,'1/9/2018','2/9/2018',0 UNION ALL
select 4,'1/1/2018','1/2/2018',0 UNION ALL --many non overlapping dates
select 4,'1/4/2018','1/5/2018',0 UNION ALL
select 4,'1/7/2018','1/9/2018',0 UNION ALL
select 4,'1/11/2018','1/13/2018',0 UNION ALL
select 4,'2/7/2018','2/8/2018',0 UNION ALL --many overlapping dates
select 4,'2/8/2018','2/9/2018',0 UNION ALL
select 4,'2/9/2018','2/10/2018',0 UNION all
select 4,'2/10/2018','2/11/2018',0 UNION all
select 4,'2/11/2018','2/12/2018',0 UNION all
select 4,'2/12/2018','2/13/2018',0 UNION all
select 4,'3/7/2018','3/8/2018',0 UNION ALL --many overlapping dates, second instance of id 4, type 0
select 4,'3/8/2018','3/9/2018',0 UNION ALL
select 4,'3/9/2018','3/10/2018',0 UNION all
select 4,'3/10/2018','3/11/2018',0 UNION all
select 4,'3/11/2018','3/12/2018',0 UNION all
select 4,'3/12/2018','3/13/2018',0
;
WITH cdata
AS (SELECT Id,
d.Type,
d.StartDate,
d.EndDate,
CurrentStart = d.StartDate
FROM @Data d
WHERE
NOT EXISTS (
SELECT * FROM @Data x WHERE x.StartDate < d.StartDate AND d.StartDate <= x.EndDate AND d.EndDate >= x.StartDate AND d.Id = x.Id AND d.Type = x.Type --get first records for overlapping ranges
)
UNION ALL
SELECT d.Id,
d.Type,
StartDate = CASE WHEN d2.StartDate < d.StartDate THEN d2.StartDate ELSE d.StartDate END,
EndDate = CASE WHEN d2.EndDate > d.EndDate THEN d2.EndDate ELSE d.EndDate END,
CurrentStart = d2.StartDate
FROM cdata d
INNER JOIN @Data d2
ON (
d.StartDate <= d2.EndDate
AND d.EndDate >= d2.StartDate
)
AND d2.Id = d.Id
AND d2.Type = d.Type
AND d2.StartDate > d.CurrentStart)
SELECT cdata.Id, cdata.Type, cdata.StartDate, EndDate = MAX(cdata.EndDate)
FROM cdata
GROUP BY cdata.Id, cdata.Type, cdata.StartDate
答案 2 :(得分:1)
这看起来像一个Packing Intervals问题。请参阅Itzik Ben-Gan的帖子,以获取所有详细信息以及他建议使用哪些索引来使其高效工作。他提出了一种无需递归CTE的解决方案。
两个音符。
下面的查询假定间隔为[closed;开放式),即t
是包含在内的内容,而StartDate
是排斥的。这种表示此类数据的方法通常是最方便的。 (在编程语言中,将数组从零开始而不是从一开始通常是更方便的。)
我添加了一个EndDate
列以进行明确的排序。
样本数据
RowID
推荐索引
DECLARE @T TABLE
(
RowID int IDENTITY,
id int,
StartDate date,
EndDate date,
tp int
);
INSERT INTO @T(Id, StartDate, EndDate, tp) VALUES
(1, '2012-02-18', '2012-03-18', 1),
(1, '2012-03-17', '2012-06-29', 1),
(1, '2012-06-27', '2012-09-27', 1),
(1, '2014-08-23', '2014-09-24', 3),
(1, '2014-09-23', '2014-10-24', 3),
(1, '2014-10-23', '2014-11-24', 3),
(2, '2015-07-04', '2015-08-06', 1),
(2, '2015-08-04', '2015-09-06', 1),
(3, '2013-11-01', '2013-12-01', 0),
(3, '2018-01-09', '2018-02-09', 0);
-- Make EndDate an opened interval, make it exclusive
-- [Start; End)
UPDATE @T
SET EndDate = DATEADD(day, 1, EndDate)
;
查询
阅读Itzik的帖子以了解发生了什么。他在那里有很好的插图。简而言之,每个时间戳(开始或结束)都被视为一个事件。每个事件的类型为-- indexes to support solutions
CREATE UNIQUE INDEX idx_start_id ON T(id, tp, StartDate, RowID);
CREATE UNIQUE INDEX idx_end_id ON T(id, tp, EndDate, RowID);
或+
。每次遇到-
事件(开始一定的间隔)时,我们都会增加运行计数器。每次遇到+
事件(一些时间间隔结束),我们都会减少运行计数器。当运行计数器为0时,表示重叠间隔的条纹结束了。
我按原样使用Itzik的查询,只是更改了列名以匹配您的名字。
-
结果
WITH C1 AS
-- let e = end ordinals, let s = start ordinals
(
SELECT
RowID, id, tp, StartDate AS ts, +1 AS EventType,
NULL AS e,
ROW_NUMBER() OVER(PARTITION BY id, tp ORDER BY StartDate, RowID) AS s
FROM @T
UNION ALL
SELECT
RowID, id, tp, EndDate AS ts, -1 AS EventType,
ROW_NUMBER() OVER(PARTITION BY id, tp ORDER BY EndDate, RowID) AS e,
NULL AS s
FROM @T
),
C2 AS
-- let se = start or end ordinal, namely, how many events (start or end) happened so far
(
SELECT C1.*,
ROW_NUMBER() OVER(PARTITION BY id, tp ORDER BY ts, EventType DESC, RowID) AS se
FROM C1
),
C3 AS
-- For start events, the expression s - (se - s) - 1 represents how many sessions were active
-- just before the current (hence - 1)
--
-- For end events, the expression (se - e) - e represents how many sessions are active
-- right after this one
--
-- The above two expressions are 0 exactly when a group of packed intervals
-- either starts or ends, respectively
--
-- After filtering only events when a group of packed intervals either starts or ends,
-- group each pair of adjacent start/end events
(
SELECT id, tp, ts,
((ROW_NUMBER() OVER(PARTITION BY id, tp ORDER BY ts) - 1) / 2 + 1)
AS grpnum
FROM C2
WHERE COALESCE(s - (se - s) - 1, (se - e) - e) = 0
)
SELECT id, tp, MIN(ts) AS StartDate, DATEADD(day, -1, MAX(ts)) AS EndDate
FROM C3
GROUP BY id, tp, grpnum
ORDER BY id, tp, StartDate;
答案 3 :(得分:0)
create table #table
(Id int,StartDate date, EndDate date, Type int)
insert into #table
values
('1','2012-02-18','2012-03-18','1'),('1','2012-03-19','2012-06-19','1'),
('1','2012-06-27','2012-09-27','1'),('1','2014-08-23','2014-09-24','3'),
('1','2014-09-23','2014-10-24','3'),('1','2014-10-23','2014-11-24','3'),
('2','2015-07-04','2015-08-06','1'),('2','2015-08-04','2015-09-06','1'),
('3','2013-11-01','2013-12-01','0'),('3','2018-01-09','2018-02-09','0')
select ID,MIN(startdate)sd,MAX(EndDate)ed,type from #table
group by ID,TYPE,YEAR(startdate),YEAR(EndDate)
答案 4 :(得分:0)
这可以通过使用某些窗口功能和CTE轻松实现。这是解决方法
DECLARE @table TABLE
(id INT,
StartDate DATE,
EndDate DATE,
[Type] INT
);
INSERT INTO @table(Id, StartDate, EndDate, [Type]) VALUES
(1, '2012-02-18', '2012-03-18', 1),
(1, '2012-03-17', '2012-06-29', 1),
(1, '2012-06-27', '2012-09-27', 1),
(1, '2014-08-23', '2014-09-24', 3),
(1, '2014-09-23', '2014-10-24', 3),
(1, '2014-10-23', '2014-11-24', 3),
(2, '2015-07-04', '2015-08-06', 1),
(2, '2015-08-04', '2015-09-06', 1),
(3, '2013-11-01', '2013-12-01', 0),
(3, '2018-01-09', '2018-02-09', 0);
WITH C1 AS
(
SELECT *,
MAX(EndDate) OVER(PARTITION BY Id, [Type]
ORDER BY StartDate, EndDate
ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS PrevEnd
FROM @table
),
C2 AS
(
SELECT *,
SUM(StartFlag) OVER(PARTITION BY Id, [Type]
ORDER BY StartDate, EndDate
ROWS UNBOUNDED PRECEDING) AS GroupID
FROM C1
CROSS APPLY ( VALUES(CASE WHEN StartDate <= PrevEnd THEN NULL ELSE 1 END) ) AS A(StartFlag)
)
SELECT Id, [Type], MIN(StartDate) AS StartDate, MAX(EndDate) AS EndDate
FROM C2
GROUP BY Id, [Type], GroupID;