结合相邻的日期

时间:2017-03-17 00:04:16

标签: sql sql-server sql-server-2008 tsql

我正在努力通过过滤器将连续日期结合起来。

示例:

我的数据集:

enter image description here

我需要的结果:(仅适用于paid = 3

enter image description here

架构和数据脚本

CREATE TABLE [dbo].[NRFC](
[work_status] [int] NOT NULL,
[paid] [varchar](2) NOT NULL,
[from_date] [date] NOT NULL,
[to_date] [date] NOT NULL) 

INSERT INTO [dbo].[NRFC]([work_status], [paid], [from_date], [to_date])
SELECT 101, N'3', '20100101 00:00:00.000', '20100116 00:00:00.000' UNION ALL
SELECT 101, N'3', '20100116 00:00:00.000', '20150128 00:00:00.000' UNION ALL
SELECT 101, N'5', '20150129 00:00:00.000', '20150331 00:00:00.000' UNION ALL
SELECT 101, N'4', '20150401 00:00:00.000', '20150512 00:00:00.000' UNION ALL
SELECT 101, N'3', '20160101 00:00:00.000', '20170315 00:00:00.000' UNION ALL
SELECT 101, N'4', '20170316 00:00:00.000', '20170317 00:00:00.000' UNION ALL
SELECT 101, N'3', '20090101 00:00:00.000', '20090110 00:00:00.000' UNION ALL
SELECT 101, N'3', '20090111 00:00:00.000', '20090115 00:00:00.000' UNION ALL
SELECT 101, N'3', '20090115 00:00:00.000', '20090331 00:00:00.000' UNION ALL
SELECT 101, N'3', '20090401 00:00:00.000', '20091212 00:00:00.000'

到目前为止我的代码:

;WITH Grouped (
    from_date
    ,to_date
    ,paid
    ,first
    )
AS (
    SELECT from_date
        ,to_date
        ,paid
        ,isnull((
                SELECT CASE 
                        WHEN paid <> w.paid
                            THEN 1
                        ELSE 0
                        END
                FROM NRFC
                WHERE from_date = (
                        SELECT max(from_date)
                        FROM NRFC
                        WHERE from_date < w.from_date
                        )
                ), 1) AS first
    FROM NRFC w
    WHERE paid = 3
    )
SELECT min(from_date) AS from_date
    ,max(to_date) AS to_date
    ,paid
FROM (
    SELECT from_date
        ,to_date
        ,paid
        ,isnull((
                SELECT sum(first)
                FROM grouped
                WHERE from_date > g.from_date
                ), 0) AS part
    FROM grouped g
    ) p
GROUP BY p.part
    ,p.paid
ORDER BY from_date

4 个答案:

答案 0 :(得分:1)

这看起来与Itzik Ben-Gan在他的文章中详细讨论的Packing intervals问题完全相同。

主要思想是间隔的每个开始都标有+1,并且间隔的每一端都标有-1。如果这些标记的运行总数大于零,则我们处于必须打包的连续区间内。

阅读文章并逐步运行下面的查询,cte-by-cte并检查中间结果以了解其工作原理。

示例数据

DECLARE @NRFC TABLE(
[work_status] [int] NOT NULL,
[paid] [varchar](2) NOT NULL,
[from_date] [date] NOT NULL,
[to_date] [date] NOT NULL);

INSERT INTO @NRFC([work_status], [paid], [from_date], [to_date])
SELECT 101, N'3', '20100101 00:00:00.000', '20100116 00:00:00.000' UNION ALL
SELECT 101, N'3', '20100116 00:00:00.000', '20150128 00:00:00.000' UNION ALL
SELECT 101, N'5', '20150129 00:00:00.000', '20150331 00:00:00.000' UNION ALL
SELECT 101, N'4', '20150401 00:00:00.000', '20150512 00:00:00.000' UNION ALL
SELECT 101, N'3', '20160101 00:00:00.000', '20170315 00:00:00.000' UNION ALL
SELECT 101, N'4', '20170316 00:00:00.000', '20170317 00:00:00.000' UNION ALL
SELECT 101, N'3', '20090101 00:00:00.000', '20090110 00:00:00.000' UNION ALL
SELECT 101, N'3', '20090111 00:00:00.000', '20090115 00:00:00.000' UNION ALL
SELECT 101, N'3', '20090115 00:00:00.000', '20090331 00:00:00.000' UNION ALL
SELECT 101, N'3', '20090401 00:00:00.000', '20091212 00:00:00.000'
;

<强>查询

该查询采用开放式间隔,这就是为什么我在to_date添加了1天并在最后减去它。

WITH
C1
AS
(
    -- let e = end ordinals, let s = start ordinals
    SELECT
        work_status
        ,paid
        ,from_date AS ts -- "from" is inclusive
        ,+1 AS type
        ,NULL AS e
        ,ROW_NUMBER() OVER (PARTITION BY work_status, paid ORDER BY from_date) AS s
    FROM @NRFC

    UNION ALL

    SELECT
        work_status
        ,paid
        ,DATEADD(day, 1, to_date) AS ts -- "to" is exclusive
        ,-1 AS type
        ,ROW_NUMBER() OVER (PARTITION BY work_status, paid ORDER BY to_date) AS e
        ,NULL AS s
    FROM @NRFC
)
,C2 
AS
(
    -- let se = start or end ordinal, namely, 
    -- how many events (start or end) happened so far
    SELECT
        C1.*
        ,ROW_NUMBER() OVER(PARTITION BY work_status, paid ORDER BY ts, type DESC) AS se
    FROM C1
)
,C3
AS
-- For start events, the expression s - (se - s) - 1 represents how many sessions
-- were active just before the current (hence - 1)
--
-- For end events, the expression (se - e) - e represents how many sessions
-- are active right after this one
--
-- The above two expressions are 0 exactly when a group of packed intervals
-- either starts or ends, respectively
--
-- After filtering only events when a group of packed intervals either starts or ends,
-- group each pair of adjacent start/end events
(
    SELECT 
        work_status
        ,paid
        ,ts
        ,(ROW_NUMBER() OVER (PARTITION BY work_status, paid ORDER BY ts) - 1)
            / 2 + 1 AS grpnum
    FROM C2
    WHERE COALESCE(s - (se - s) - 1, (se - e) - e) = 0
)
SELECT
    work_status
    ,paid
    ,MIN(ts) AS starttime
    ,DATEADD(day, -1, MAX(ts)) AS endtime
FROM C3
GROUP BY
    work_status
    ,paid
    ,grpnum
ORDER BY
    work_status
    ,paid
    ,starttime
;

<强>结果

+-------------+------+------------+------------+
| work_status | paid | starttime  |  endtime   |
+-------------+------+------------+------------+
|         101 |    3 | 2009-01-01 | 2009-12-12 |
|         101 |    3 | 2010-01-01 | 2015-01-28 |
|         101 |    3 | 2016-01-01 | 2017-03-15 |
|         101 |    4 | 2015-04-01 | 2015-05-12 |
|         101 |    4 | 2017-03-16 | 2017-03-17 |
|         101 |    5 | 2015-01-29 | 2015-03-31 |
+-------------+------+------------+------------+

答案 1 :(得分:1)

另一个问题:

CREATE TABLE #tb([work_status] [int] NOT NULL,[paid] [varchar](2) NOT NULL,[from_date] [date] NOT NULL,[to_date] [date] NOT NULL);

INSERT INTO #tb([work_status], [paid], [from_date], [to_date])
SELECT 101, N'3', '20100101 00:00:00.000', '20100116 00:00:00.000' UNION ALL
SELECT 101, N'3', '20100116 00:00:00.000', '20150128 00:00:00.000' UNION ALL
SELECT 101, N'5', '20150129 00:00:00.000', '20150331 00:00:00.000' UNION ALL
SELECT 101, N'4', '20150401 00:00:00.000', '20150512 00:00:00.000' UNION ALL
SELECT 101, N'3', '20160101 00:00:00.000', '20170315 00:00:00.000' UNION ALL
SELECT 101, N'4', '20170316 00:00:00.000', '20170317 00:00:00.000' UNION ALL
SELECT 101, N'3', '20090101 00:00:00.000', '20090110 00:00:00.000' UNION ALL
SELECT 101, N'3', '20090111 00:00:00.000', '20090115 00:00:00.000' UNION ALL
SELECT 101, N'3', '20090115 00:00:00.000', '20090331 00:00:00.000' UNION ALL
SELECT 101, N'3', '20090401 00:00:00.000', '20091212 00:00:00.000'
SELECT tt.work_status,tt.paid,MIN(dt) AS from_date,MAX(dt) AS to_date
FROM (
    SELECT DISTINCT t.work_status,t.paid,DATEADD(d,sv.number,t.from_date) AS dt
                   ,DENSE_RANK()OVER(PARTITION BY t.work_status,t.paid ORDER BY DATEADD(d,sv.number,t.from_date)) AS rn
                   ,MIN(DATEADD(d,sv.number,t.from_date))OVER(PARTITION BY t.work_status,t.paid) AS from_date
    FROM #tb AS t
    INNER JOIN master.dbo.spt_values AS sv on sv.type='P' AND sv.number BETWEEN 0 AND DATEDIFF(d,t.from_date,t.to_date)
) AS tt
GROUP BY tt.work_status,tt.paid,rn-DATEDIFF(d,tt.from_date,dt)
ORDER BY tt.work_status,tt.paid,MIN(dt)
work_status paid from_date  to_date
----------- ---- ---------- ----------
101         3    2009-01-01 2009-12-12
101         3    2010-01-01 2015-01-28
101         3    2016-01-01 2017-03-15
101         4    2015-04-01 2015-05-12
101         4    2017-03-16 2017-03-17
101         5    2015-01-29 2015-03-31

答案 2 :(得分:0)

使用递归CTE我们可以实现所需的输出。我只考虑pay = 3编写代码。请检查。

;with CTE as
 (
 select a.work_status,a.paid,a.from_date,ISNULL(B.to_date,A.to_date)to_date
 from NFRC A left outer join NFRC B on (A.to_date=B.from_date OR DATEDIFF(D,A.to_date,B.from_date)=1) and A.paid=B.paid
 where a.paid=3
 union all
 select a.work_status,a.paid,a.from_date,b.to_date
 from CTE A inner join NFRC B on (A.to_date=B.from_date OR DATEDIFF(D,A.to_date,B.from_date)=1) and A.paid=B.paid
 where a.paid=3 
 )
select work_status,paid,min(from_date)from_date,to_date 
from (select work_status,paid,from_date,max(to_date) to_date 
      from CTE 
      group by from_date,work_status,paid ) G 
group by to_date,work_status,paid

答案 3 :(得分:0)

这是组和窗口的典型示例。

首先你需要设置一个重置点,在这种情况下,重置点是paid与之前的paid不同,或者to_date与下一个{{1之间的差异天数大于1天。

from_date
 work_status | paid | from_date  | to_date    | last_paid | dif_days | is_reset
 ----------: | :--- | :----------| :--------- | :-------- | -------: | -------:
         101 | 3    | 01/01/2009 | 10/01/2009 | 3         |        0 |     null
         101 | 3    | 11/01/2009 | 15/01/2009 | 3         |        1 |     null
         101 | 3    | 15/01/2009 | 31/03/2009 | 3         |        0 |     null
         101 | 3    | 01/04/2009 | 12/12/2009 | 3         |        1 |     null
         101 | 3    | 01/01/2010 | 16/01/2010 | 3         |       20 |        1
         101 | 3    | 16/01/2010 | 28/01/2015 | 3         |        0 |     null
         101 | 3    | 01/01/2016 | 15/03/2017 | 3         |      338 |        1
         101 | 4    | 01/04/2015 | 12/05/2015 | 3         |        0 |        1
         101 | 4    | 16/03/2017 | 17/03/2017 | 4         |      674 |        1
         101 | 5    | 29/01/2015 | 31/03/2015 | 4         |        0 |        1

在我们知道重置点之后,我们就可以生成组。

select work_status, paid, from_date, to_date,
       coalesce(lag(paid) over (partition by work_status order by work_status, paid, from_date), paid) last_paid,
       coalesce(datediff(day, lag(to_date) OVER (partition by work_status, paid ORDER BY work_status, paid, from_date), from_date),0) dif_days,
       case when coalesce(lag(paid) over (partition by work_status order by work_status, paid, from_date), paid) <> paid 
                 or
                 coalesce(datediff(day, lag(to_date) OVER (partition by work_status, paid ORDER BY work_status, paid, from_date), from_date),0) > 1
            then  1 end is_reset
from   @NRFC;
     work_status | paid | from_date           | to_date             | is_reset | grp
     ----------: | :--- | :------------------ | :------------------ | -------: | --:
             101 | 3    | 01/01/2009 00:00:00 | 10/01/2009 00:00:00 |     null |   0
             101 | 3    | 11/01/2009 00:00:00 | 15/01/2009 00:00:00 |     null |   0
             101 | 3    | 15/01/2009 00:00:00 | 31/03/2009 00:00:00 |     null |   0
             101 | 3    | 01/04/2009 00:00:00 | 12/12/2009 00:00:00 |     null |   0
             101 | 3    | 01/01/2010 00:00:00 | 16/01/2010 00:00:00 |        1 |   1
             101 | 3    | 16/01/2010 00:00:00 | 28/01/2015 00:00:00 |     null |   1
             101 | 3    | 01/01/2016 00:00:00 | 15/03/2017 00:00:00 |        1 |   2
             101 | 4    | 01/04/2015 00:00:00 | 12/05/2015 00:00:00 |        1 |   3
             101 | 4    | 16/03/2017 00:00:00 | 17/03/2017 00:00:00 |        1 |   4
             101 | 5    | 29/01/2015 00:00:00 | 31/03/2015 00:00:00 |        1 |   5

最后使用MIN(),MAX()使用新组:

work_status | paid | from_date           | to_date            
----------: | :--- | :------------------ | :------------------
        101 | 3    | 01/01/2009 00:00:00 | 12/12/2009 00:00:00
        101 | 3    | 01/01/2010 00:00:00 | 28/01/2015 00:00:00
        101 | 3    | 01/01/2016 00:00:00 | 15/03/2017 00:00:00
        101 | 4    | 01/04/2015 00:00:00 | 12/05/2015 00:00:00
        101 | 4    | 16/03/2017 00:00:00 | 17/03/2017 00:00:00
        101 | 5    | 29/01/2015 00:00:00 | 31/03/2015 00:00:00

dbfiddle here