SQL:合并日期范围

时间:2012-07-25 15:28:55

标签: sql date intervals range

我有一张表,其中描述了业务工作日历的工作片段:(日期格式为24小时格式)

PK  | STARTDATE          | ENDDATE
__________________________________________
1   | 2012/07/21 02:00   | 2012/07/21 04:00
2   | 2012/07/21 03:00   | 2012/07/21 10:00
3   | 2012/07/21 06:00   | 2012/07/21 17:00
4   | 2012/07/21 18:00   | 2012/07/21 19:00

现在,我想合并日期范围(在给定的开始日期和结束日期内),如下所示:

PK  | STARTDATE          | ENDDATE
__________________________________________
1   | 2012/07/21 02:00   | 2012/07/21 17:00
2   | 2012/07/21 18:00   | 2012/07/21 19:00

有没有办法用SQL97标准做到这一点?如果是这样,那么其他操作是什么(例如,如果我想要进行合并,结果应该是

PK  | STARTDATE          | ENDDATE
__________________________________________
1   | 2012/07/21 00:00   | 2012/07/21 02:00
2   | 2012/07/21 19:00   | 2012/07/22 00:00

4 个答案:

答案 0 :(得分:5)

以下是使用SQL Server语法的示例。首先,它确定“头”或没有先前重叠行的行。要确定“头部”的最后一个“子”,它会查找小于下一个“头部”的最后一行。这是SQL:

; with  heads as
        (
        select  row_number() over (order by head.StartDate) as PK
        ,       *
        from    YourTable head
        where   not exists 
                (
                select  *
                from    YourTable prev
                where   prev.StartDate < head.StartDate
                        and head.StartDate < prev.EndDate
                )
        )
select  row_number() over (order by h.StartDate) as PK
,       h.StartDate
,       max(yt.EndDate) as EndDate
from    heads h
left join
        heads nh
on      nh.PK = h.PK + 1
left join
        YourTable yt
on      h.StartDate <= yt.StartDate
        and (yt.StartDate < nh.StartDate or nh.StartDate is null)
group by
        h.StartDate

Live example at SQL Fiddle.

答案 1 :(得分:3)

这应该可以解决大多数支持DBMS的SQL-92问题。这里没有高级SQL语法。

性能可能不太好,因为它必须加入同一个表4次。如果使用DBMS特定语法是一个选项,您可能会获得更好的性能。

SELECT
  D.StartDate,
  (
    SELECT Min(E.EndDate)
    FROM dbo.Dates E
    WHERE
      E.EndDate >= D.EndDate
      AND NOT EXISTS (
        SELECT *
        FROM dbo.Dates E2
        WHERE
        E.StartDate < E2.StartDate
        AND E.EndDate > E2.StartDate
      )
  ) EndDate
FROM
  dbo.Dates D
WHERE
  NOT EXISTS (
    SELECT *
    FROM dbo.Dates D2
    WHERE
      D.StartDate < D2.EndDate
      AND D.EndDate > D2.EndDate
  );

对于在几个不同的RDBMS中工作的同一查询,请参阅Sql Fiddle:

更新

这是一个仍然不进行递归的新查询,只扫描一次表。它确实有两种类型,这是查询中最昂贵的部分(此示例中只有几行的成本的88%)。但是,不要低估少量读取的好处,也不要低估...有时这样的查询可能会成为主要的屁股。

WITH Data AS (
   SELECT
      StartDate = Convert(datetime, StartDate),
      EndDate = Convert(datetime, EndDate)
   FROM (VALUES
      ('02:00', '04:00'), ('03:00', '10:00'), (' 09:00', '12:00'), (' 11:00', '17:00'), (' 18:00', '19:00')
   ) D (StartDate, EndDate)
), LeadLag AS (
   SELECT
      PrevEndDate = Coalesce(Lag(EndDate) OVER (ORDER BY StartDate), Convert(datetime2, '00010101')),
      NextStartDate = Coalesce(Lead(StartDate) OVER (ORDER BY StartDate), Convert(datetime2, '99991231')),
      *
   FROM Data
), Dates AS (
   SELECT
      X.*
   FROM
      LeadLag
      CROSS APPLY (
         SELECT
            StartDate = CASE WHEN PrevEndDate < StartDate THEN StartDate ELSE NULL END,
            EndDate = CASE WHEN EndDate < NextStartDate THEN EndDate ELSE NULL END
      ) X
   WHERE
      X.StartDate IS NOT NULL
      OR X.EndDate IS NOT NULL
), Final AS (
   SELECT
      StartDate,
      EndDate = Min(EndDate) OVER (ORDER BY EndDate ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
   FROM Dates
)
SELECT *
FROM Final
WHERE StartDate IS NOT NULL
;

答案 2 :(得分:0)

这是我的解决方案。

IF OBJECT_ID('tempdb..#tblDates') IS NOT NULL
    DROP TABLE #tblDates

CREATE TABLE #tblDates (AutoId INT IDENTITY, StartDate DATE, EndDate DATE)

INSERT #tblDates (StartDate, EndDate) SELECT '2014-11-02', '2014-11-08'
INSERT #tblDates (StartDate, EndDate) SELECT '2014-11-07', '2014-11-10'
INSERT #tblDates (StartDate, EndDate) SELECT '2014-11-06', '2014-11-12'

INSERT #tblDates (StartDate, EndDate) SELECT '2014-11-02', '2014-11-15'

INSERT #tblDates (StartDate, EndDate) SELECT '2014-12-10', '2014-12-13'
INSERT #tblDates (StartDate, EndDate) SELECT '2014-12-12', '2014-12-15'
INSERT #tblDates (StartDate, EndDate) SELECT '2014-12-14', '2014-12-16'


-- Optional / Remove the duplicated records of same StartDate and EndDate
DELETE FROM #tblDates WHERE AutoId NOT IN (SELECT MAX(AutoId) FROM #tblDates GROUP BY StartDate, EndDate)

-- Optional / Get only the record with max EndDate grouped by StartDate, Remove Others
DELETE  d1
FROM    #tblDates d1
        JOIN (SELECT x.StartDate, MAX(x.EndDate) MAXEndDate FROM #tblDates x GROUP BY x.StartDate) d2 ON d2.StartDate = d1.StartDate AND d2.MAXEndDate != d1.EndDate

-- Optional / Get only the record with min StartDate grouped by EndDate, Remove Others
DELETE  d1
FROM    #tblDates d1
        JOIN (SELECT x.EndDate, MIN(x.StartDate) MINStartDate FROM #tblDates x GROUP BY x.EndDate) d2 ON d2.EndDate = d1.EndDate AND d2.MINStartDate != d1.StartDate

-- Optional / Remove the overlapping ranges of relevant StartDate and EndDate
DELETE  c
FROM    #tblDates p
        JOIN #tblDates c ON c.AutoId != p.AutoId AND c.StartDate BETWEEN p.StartDate AND p.EndDate AND c.EndDate BETWEEN p.StartDate AND p.EndDate


;WITH Ranges
AS
(
    SELECT  s.StartDate, s.EndDate
    FROM    #tblDates s
            LEFT JOIN #tblDates a ON a.AutoId != s.AutoId AND s.StartDate BETWEEN a.StartDate AND a.EndDate AND s.StartDate != a.StartDate
    WHERE   a.AutoId IS NULL
    UNION ALL
    SELECT  r.StartDate, d.EndDate
    FROM    Ranges r
            JOIN #tblDates d ON r.EndDate != d.EndDate AND r.EndDate BETWEEN d.StartDate AND d.EndDate
)

SELECT StartDate, MAX(EndDate) EndDate FROM Ranges GROUP BY StartDate

答案 3 :(得分:0)

基于ErikE回复:

IF(object_id('dbo.Periods') is not null)
    drop table Periods

go
create table Periods (
    StartDate date not null,
    EndDate date not null
)
go
insert into Periods(StartDate,EndDate)
select '1980-01-01','1980-01-10' union all
select '1980-01-03','1980-01-07' union all

select '2000-01-01','2000-01-10' union all
select '2000-01-05','2000-01-30' union all
select '2000-01-12','2000-01-20' union all

select '2021-01-01','2021-01-01'
go

; with LeadLag AS (
   SELECT     
     rownum = row_number() OVER( ORDER BY StartDate),
     PrevEndDate = Coalesce(Lag(EndDate) OVER (ORDER BY StartDate), Convert(datetime2, '0001-01-01')), 
     p.*
   FROM Periods p
), Dates AS (
   SELECT
        StartDate = CASE WHEN PrevEndDate < StartDate THEN StartDate ELSE NULL END,
        EndDate,           
        rownum
   FROM   LeadLag
), startGrouping AS (
   SELECT
      StartDate =  max(StartDate) OVER (ORDER BY rownum rows UNBOUNDED PRECEDING),
      EndDate,
      rownum
   FROM Dates
),
 groups AS (
   SELECT
      StartDate,
      EndDate,
      rownum,
      ingroupRownum = row_number() OVER(PARTITION BY StartDate ORDER BY EndDate desc)
   FROM startGrouping e1
)
SELECT StartDate, EndDate
from groups
WHERE  ingroupRownum = 1