我有一张表,其中描述了业务工作日历的工作片段:(日期格式为24小时格式)
PK | STARTDATE | ENDDATE
__________________________________________
1 | 2012/07/21 02:00 | 2012/07/21 04:00
2 | 2012/07/21 03:00 | 2012/07/21 10:00
3 | 2012/07/21 06:00 | 2012/07/21 17:00
4 | 2012/07/21 18:00 | 2012/07/21 19:00
现在,我想合并日期范围(在给定的开始日期和结束日期内),如下所示:
PK | STARTDATE | ENDDATE
__________________________________________
1 | 2012/07/21 02:00 | 2012/07/21 17:00
2 | 2012/07/21 18:00 | 2012/07/21 19:00
有没有办法用SQL97标准做到这一点?如果是这样,那么其他操作是什么(例如,如果我想要进行合并,结果应该是
PK | STARTDATE | ENDDATE
__________________________________________
1 | 2012/07/21 00:00 | 2012/07/21 02:00
2 | 2012/07/21 19:00 | 2012/07/22 00:00
答案 0 :(得分:5)
以下是使用SQL Server语法的示例。首先,它确定“头”或没有先前重叠行的行。要确定“头部”的最后一个“子”,它会查找小于下一个“头部”的最后一行。这是SQL:
; with heads as
(
select row_number() over (order by head.StartDate) as PK
, *
from YourTable head
where not exists
(
select *
from YourTable prev
where prev.StartDate < head.StartDate
and head.StartDate < prev.EndDate
)
)
select row_number() over (order by h.StartDate) as PK
, h.StartDate
, max(yt.EndDate) as EndDate
from heads h
left join
heads nh
on nh.PK = h.PK + 1
left join
YourTable yt
on h.StartDate <= yt.StartDate
and (yt.StartDate < nh.StartDate or nh.StartDate is null)
group by
h.StartDate
答案 1 :(得分:3)
这应该可以解决大多数支持DBMS的SQL-92问题。这里没有高级SQL语法。
性能可能不太好,因为它必须加入同一个表4次。如果使用DBMS特定语法是一个选项,您可能会获得更好的性能。
SELECT
D.StartDate,
(
SELECT Min(E.EndDate)
FROM dbo.Dates E
WHERE
E.EndDate >= D.EndDate
AND NOT EXISTS (
SELECT *
FROM dbo.Dates E2
WHERE
E.StartDate < E2.StartDate
AND E.EndDate > E2.StartDate
)
) EndDate
FROM
dbo.Dates D
WHERE
NOT EXISTS (
SELECT *
FROM dbo.Dates D2
WHERE
D.StartDate < D2.EndDate
AND D.EndDate > D2.EndDate
);
对于在几个不同的RDBMS中工作的同一查询,请参阅Sql Fiddle:
这是一个仍然不进行递归的新查询,只扫描一次表。它确实有两种类型,这是查询中最昂贵的部分(此示例中只有几行的成本的88%)。但是,不要低估少量读取的好处,也不要低估...有时这样的查询可能会成为主要的屁股。
WITH Data AS (
SELECT
StartDate = Convert(datetime, StartDate),
EndDate = Convert(datetime, EndDate)
FROM (VALUES
('02:00', '04:00'), ('03:00', '10:00'), (' 09:00', '12:00'), (' 11:00', '17:00'), (' 18:00', '19:00')
) D (StartDate, EndDate)
), LeadLag AS (
SELECT
PrevEndDate = Coalesce(Lag(EndDate) OVER (ORDER BY StartDate), Convert(datetime2, '00010101')),
NextStartDate = Coalesce(Lead(StartDate) OVER (ORDER BY StartDate), Convert(datetime2, '99991231')),
*
FROM Data
), Dates AS (
SELECT
X.*
FROM
LeadLag
CROSS APPLY (
SELECT
StartDate = CASE WHEN PrevEndDate < StartDate THEN StartDate ELSE NULL END,
EndDate = CASE WHEN EndDate < NextStartDate THEN EndDate ELSE NULL END
) X
WHERE
X.StartDate IS NOT NULL
OR X.EndDate IS NOT NULL
), Final AS (
SELECT
StartDate,
EndDate = Min(EndDate) OVER (ORDER BY EndDate ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
FROM Dates
)
SELECT *
FROM Final
WHERE StartDate IS NOT NULL
;
答案 2 :(得分:0)
这是我的解决方案。
IF OBJECT_ID('tempdb..#tblDates') IS NOT NULL
DROP TABLE #tblDates
CREATE TABLE #tblDates (AutoId INT IDENTITY, StartDate DATE, EndDate DATE)
INSERT #tblDates (StartDate, EndDate) SELECT '2014-11-02', '2014-11-08'
INSERT #tblDates (StartDate, EndDate) SELECT '2014-11-07', '2014-11-10'
INSERT #tblDates (StartDate, EndDate) SELECT '2014-11-06', '2014-11-12'
INSERT #tblDates (StartDate, EndDate) SELECT '2014-11-02', '2014-11-15'
INSERT #tblDates (StartDate, EndDate) SELECT '2014-12-10', '2014-12-13'
INSERT #tblDates (StartDate, EndDate) SELECT '2014-12-12', '2014-12-15'
INSERT #tblDates (StartDate, EndDate) SELECT '2014-12-14', '2014-12-16'
-- Optional / Remove the duplicated records of same StartDate and EndDate
DELETE FROM #tblDates WHERE AutoId NOT IN (SELECT MAX(AutoId) FROM #tblDates GROUP BY StartDate, EndDate)
-- Optional / Get only the record with max EndDate grouped by StartDate, Remove Others
DELETE d1
FROM #tblDates d1
JOIN (SELECT x.StartDate, MAX(x.EndDate) MAXEndDate FROM #tblDates x GROUP BY x.StartDate) d2 ON d2.StartDate = d1.StartDate AND d2.MAXEndDate != d1.EndDate
-- Optional / Get only the record with min StartDate grouped by EndDate, Remove Others
DELETE d1
FROM #tblDates d1
JOIN (SELECT x.EndDate, MIN(x.StartDate) MINStartDate FROM #tblDates x GROUP BY x.EndDate) d2 ON d2.EndDate = d1.EndDate AND d2.MINStartDate != d1.StartDate
-- Optional / Remove the overlapping ranges of relevant StartDate and EndDate
DELETE c
FROM #tblDates p
JOIN #tblDates c ON c.AutoId != p.AutoId AND c.StartDate BETWEEN p.StartDate AND p.EndDate AND c.EndDate BETWEEN p.StartDate AND p.EndDate
;WITH Ranges
AS
(
SELECT s.StartDate, s.EndDate
FROM #tblDates s
LEFT JOIN #tblDates a ON a.AutoId != s.AutoId AND s.StartDate BETWEEN a.StartDate AND a.EndDate AND s.StartDate != a.StartDate
WHERE a.AutoId IS NULL
UNION ALL
SELECT r.StartDate, d.EndDate
FROM Ranges r
JOIN #tblDates d ON r.EndDate != d.EndDate AND r.EndDate BETWEEN d.StartDate AND d.EndDate
)
SELECT StartDate, MAX(EndDate) EndDate FROM Ranges GROUP BY StartDate
答案 3 :(得分:0)
基于ErikE回复:
IF(object_id('dbo.Periods') is not null)
drop table Periods
go
create table Periods (
StartDate date not null,
EndDate date not null
)
go
insert into Periods(StartDate,EndDate)
select '1980-01-01','1980-01-10' union all
select '1980-01-03','1980-01-07' union all
select '2000-01-01','2000-01-10' union all
select '2000-01-05','2000-01-30' union all
select '2000-01-12','2000-01-20' union all
select '2021-01-01','2021-01-01'
go
; with LeadLag AS (
SELECT
rownum = row_number() OVER( ORDER BY StartDate),
PrevEndDate = Coalesce(Lag(EndDate) OVER (ORDER BY StartDate), Convert(datetime2, '0001-01-01')),
p.*
FROM Periods p
), Dates AS (
SELECT
StartDate = CASE WHEN PrevEndDate < StartDate THEN StartDate ELSE NULL END,
EndDate,
rownum
FROM LeadLag
), startGrouping AS (
SELECT
StartDate = max(StartDate) OVER (ORDER BY rownum rows UNBOUNDED PRECEDING),
EndDate,
rownum
FROM Dates
),
groups AS (
SELECT
StartDate,
EndDate,
rownum,
ingroupRownum = row_number() OVER(PARTITION BY StartDate ORDER BY EndDate desc)
FROM startGrouping e1
)
SELECT StartDate, EndDate
from groups
WHERE ingroupRownum = 1