折叠具有相邻开始日期和结束日期的记录

时间:2014-12-30 23:03:25

标签: sql-server tsql

我在SQL Server 2008 R2的表中有以下数据:

 ID        Code    StartDate     EndDate
 10001     3       2014-07-25    2014-07-28
 10001     3       2014-07-29    2014-10-06
 10001     3       2014-10-07    2014-10-10
 10001     1       2014-10-11    2014-10-31
 10001     1       2014-11-01    2014-11-15
 10001     3       2014-11-16    2014-11-25
 10001     3       2014-11-26    NULL
 20002     3       2014-07-25    2014-07-28
 20002     3       2014-07-29    2014-10-06
 20002     3       2014-10-07    NULL
 30003     3       2014-07-25    2014-11-13
 30003     3       2014-11-14    2014-11-24
 30003     2       2014-11-25    NULL

我希望“折叠”具有相同Code和相邻EndDateStartDate的所有记录。结果应该是:

 ID        Code    StartDate     EndDate
 10001     3       2014-07-25    2014-10-10    
 10001     1       2014-10-11    2014-11-15
 10001     3       2014-11-16    NULL
 20002     3       2014-07-25    NULL
 30003     3       2014-07-25    2014-11-24
 30003     2       2014-11-25    NULL

我一直在尝试使用各种子查询和ROW_NUMBER()函数,但却无法使其工作。我怀疑这可以通过CTE轻松完成,但我无法围绕这些工作如何在这里尝试。有什么想法吗?

4 个答案:

答案 0 :(得分:0)

试试这个,

CREATE TABLE #TEMP
(
    ID INT,
    CODE INT,
    STARTDATE DATE,
    ENDDATE DATE
)

INSERT INTO #TEMP VALUES
(10001,3,'2014-07-25','2014-07-28'),
(10001,3,'2014-07-29','2014-10-06'),
(10001,3,'2014-10-07','2014-10-10'),
(10001,1,'2014-10-11','2014-10-31'),
(10001,1,'2014-11-01','2014-11-15'),
(10001,3,'2014-11-16','2014-11-25'),
(10001,3,'2014-11-26',NULL),
(20002,3,'2014-07-25','2014-07-28'),
(20002,3,'2014-07-29','2014-10-06'),
(20002,3,'2014-10-07',NULL),
(30003,3,'2014-07-25','2014-11-13'),
(30003,3,'2014-11-14','2014-11-24'),
(30003,2,'2014-11-25',NULL)


SELECT T1.ID,T1.CODE,T1.STARTDATE,A.ENDDATE FROM (SELECT L.ID,L.CODE,MIN(STARTDATE) AS STARTDATE,DIFF FROM (SELECT ID,
       CODE,
       STARTDATE,
       ENDDATE,
       IsNull(Lag(CODE, 2)
                OVER (
                  ORDER BY ID, STARTDATE, ENDDATE), CODE) AS T_LAG,
       CODE - IsNull(Lag(CODE, 2)
                       OVER (
                         ORDER BY ID, STARTDATE, ENDDATE), CODE) AS DIFF
FROM   #TEMP ) L
GROUP BY L.ID,L.CODE,DIFF) T1
CROSS APPLY(
    SELECT ID,CODE,ENDDATE,DIFF FROM (SELECT ID,CODE,ENDDATE,DIFF,ROW_NUMBER() OVER (PARTITION BY ID,CODE,DIFF ORDER BY ID,CODE,STARTDATE DESC,ENDDATE DESC) AS T_R FROM (SELECT ID,
       CODE,
       STARTDATE,
       ENDDATE,
       IsNull(Lag(CODE, 2)
                OVER (
                  ORDER BY ID, STARTDATE, ENDDATE), CODE) AS T_LAG,
       CODE - IsNull(Lag(CODE, 2)
                       OVER (
                         ORDER BY ID, STARTDATE, ENDDATE), CODE) AS DIFF
FROM   #TEMP ) A) A
WHERE T_R=1 AND ID=T1.ID AND CODE=T1.CODE AND DIFF=T1.DIFF)A
ORDER BY T1.ID,T1.STARTDATE

答案 1 :(得分:0)

试试这个(我在2008年使用Row_Number()复制SQL 2012 Lead And Lag功能):

SQL Fiddle

MS SQL Server 2008架构设置

CREATE TABLE EventLog
(
ID Int,
Code tinyint,
StartDate Date,
EndDate Date Null
)

INSERT INTO EventLog
Values
(10001,     3,       '2014-07-25',    '2014-07-28'),
(10001,     3,       '2014-07-29',    '2014-10-06'),
(10001,     3,       '2014-10-07',    '2014-10-10'),
(10001,     1,       '2014-10-11',    '2014-10-31'),
(10001,     1,       '2014-11-01',    '2014-11-15'),
(10001,     3,       '2014-11-16',    '2014-11-25'),
(10001,     3,       '2014-11-26',     NULL),
(20002,     3,       '2014-07-25',    '2014-07-28'),
(20002,     3,       '2014-07-29',    '2014-10-06'),
(20002,     3,       '2014-10-07',     NULL),
(30003,     3,       '2014-07-25',    '2014-11-13'),
(30003,     3,       '2014-11-14',    '2014-11-24'),
(30003,     2,       '2014-11-25',    NULL)

查询1

WITH CTE
AS
(
    SELECT ID,
        Code,
        StartDate,
        EndDate,
        ROW_NUMBER() OVER (PARTITION  BY Id, Code ORDER BY Id, Code, StartDate) As RN
    FROM EventLog
),
CTE2
AS
(
    SELECT CTE.Id, CTE.Code, CTE.StartDate, CTE.EndDate, 
        CASE WHEN DATEDIFF(d, LAG.EndDate, CTE.StartDate) = 1 
             THEN Lag.EndDate 
             ELSE NULL 
        END AS PrevEndDate, 
        CASE WHEN DateDiff(d, LEAD.StartDate, CTE.EndDate) = -1 
             THEN Lead.StartDate 
             ELSE NULL 
        END As NextStartDate
    FROM CTE
    LEFT OUTER JOIN CTE AS Lag
        ON CTE.ID = Lag.ID AND CTE.Code = Lag.Code AND Lag.Rn = CTE.RN - 1
    LEFT OUTER JOIN CTE AS Lead
        ON CTE.ID = Lead.ID AND CTE.Code = Lead.Code AND Lead.Rn = CTE.RN + 1
),
StartAndEnd
As
(
    SELECT  ID, 
            Code, 
            StartDate, 
            EndDate,
            PrevEndDate,
            NextStartDate,
            ROW_NUMBER() OVER (PARTITION BY Id, Code ORDER BY ID, Code, StartDate) As RN
    FROM CTE2
    WHERE (PrevEndDate IS NULL Or NextStartDate IS NULL)
)
SELECT S.ID, s.Code, S.StartDate, E.EndDate
FROM StartAndEnd as S
LEFT JOIN StartAndEnd E
    ON S.ID = E.ID AND S.Code = E.Code AND E.RN = S.Rn + 1
WHERE S.PrevEndDate Is Null
ORDER By s.Id, S.StartDate

<强> Results

|    ID | CODE |  STARTDATE |    ENDDATE |
|-------|------|------------|------------|
| 10001 |    3 | 2014-07-25 | 2014-10-10 |
| 10001 |    1 | 2014-10-11 | 2014-11-15 |
| 10001 |    3 | 2014-11-16 |     (null) |
| 20002 |    3 | 2014-07-25 |     (null) |
| 30003 |    3 | 2014-07-25 | 2014-11-24 |
| 30003 |    2 | 2014-11-25 |     (null) |

答案 2 :(得分:0)

由于您的范围是连续的,因此问题基本上变为© Andriy M

感谢Steve Ford for the table

declare @EventLog table
(
    Id          int,
    Code        tinyint,
    StartDate   date,
    EndDate     date null
)

insert into @EventLog
values
(10001,     3,       '2014-07-25',    '2014-07-28'),
(10001,     3,       '2014-07-29',    '2014-10-06'),
(10001,     3,       '2014-10-07',    '2014-10-10'),
(10001,     1,       '2014-10-11',    '2014-10-31'),
(10001,     1,       '2014-11-01',    '2014-11-15'),
(10001,     3,       '2014-11-16',    '2014-11-25'),
(10001,     3,       '2014-11-26',     null),
(20002,     3,       '2014-07-25',    '2014-07-28'),
(20002,     3,       '2014-07-29',    '2014-10-06'),
(20002,     3,       '2014-10-07',     null),
(30003,     3,       '2014-07-25',    '2014-11-13'),
(30003,     3,       '2014-11-14',    '2014-11-24'),
(30003,     2,       '2014-11-25',    null);

感谢Andriy Msolution

declare @MaxDate date = '9999-12-31';

with cte as 
(
    select *,
      g = row_number() over (partition by Id        order by StartDate)
        - row_number() over (partition by Id, Code  order by StartDate)   
    from @EventLog
)
select
    Id, 
    Code,
    StartDate   = min(StartDate),
    EndDate     = nullif(max(isnull(EndDate, @MaxDate)), @MaxDate) 
from cte
group by
    Id, Code, g;

答案 3 :(得分:0)

;WITH StartDates
AS(
   Select e1.ID, e1.Code, e1.StartDate, ROW_NUMBER() OVER (Order By e1.ID asc) as RowNumber
   From @EventLog e1
   LEFT JOIN @EventLog e2 ON e1.Code = e2.Code and e2.EndDate = DATEADD(day,-1,e1.StartDate)
   WHERE e2.Id is null
),
EndDates as(
Select e1.ID, e1.Code, e1.EndDate, ROW_NUMBER() OVER (Order by e1.ID asc) as RowNumber
FROM @EventLog e1
LEFT JOIN @EventLog e2 ON e1.Code = e2.Code and e2.StartDate = DATEADD(day,1,e1.EndDate)
WHERE e2.Id is null
)

Select s.ID, s.Code, s.StartDate, e.EndDate
FROM StartDates s
JOIN EndDates e ON s.Code = e.Code and s.RowNumber = e.RowNumber