使用条件对连续日期进行分组

时间:2018-02-14 05:12:53

标签: sql sql-server tsql

SQL Server

数据:

 Row | Type | Start Date | End Date   | Dollars
------------------------------------------
1    |  1   | 01/01/2017 | 01/05/2017 |  10
2    |  1   | 01/08/2017 | 01/12/2017 |  10
3    |  1   | 01/17/2017 | 01/19/2017 |  10
4    |  1   | 01/28/2017 | 02/10/2017 |  10
5    |  1   | 02/20/2017 | 03/10/2017 |  10
6    |  2   | 10/01/2017 | 10/03/2017 |  10
7    |  2   | 10/20/2017 | 10/23/2017 |  10
8    |  2   | 10/25/2017 | 10/29/2017 |  10

在类型中,我需要对连续的日期时段进行分组,只要它们相隔不到7天,将每个组中的美元和总间隔天数相加。

中间表:

 Row | Type | Start Date | End Date   | Dollars | Grouping | GapDays
------------------------------------------
1    |  1   | 01/01/2017 | 01/05/2017 |  10     |  1       |  null
2    |  1   | 01/08/2017 | 01/12/2017 |  10     |  1       |  3
3    |  1   | 01/17/2017 | 01/19/2017 |  10     |  1       |  5
4    |  1   | 01/28/2017 | 02/10/2017 |  10     |  2       |  9
5    |  1   | 02/20/2017 | 03/10/2017 |  10     |  3       |  10
6    |  2   | 10/01/2017 | 10/03/2017 |  10     |  1       |  null
7    |  2   | 10/20/2017 | 10/23/2017 |  10     |  2       |  17
8    |  2   | 10/25/2017 | 10/29/2017 |  10     |  2       |  2

结果:

----------------------------------------------------
 Type | Start Date |  End Date  | Dollars | GapDays
----------------------------------------------------
  1   | 01/01/2017 | 01/19/2017 |   30    |   8
  1   | 01/28/2017 | 02/10/2017 |   10    |   0
  1   | 02/20/2017 | 03/10/2017 |   10    |   0
  2   | 10/01/2017 | 10/03/2017 |   10    |   0
  2   | 10/20/2017 | 10/29/2017 |   20    |   2

解决方案:对&#34;连续&#34;使用二进制分组日期标准(在这种情况下<7天),然后使用该分组与无界限之前的行

with cte as (
select
  *,
  COALESCE(DATEDIFF(dd, LAG(EndDate, 1, NULL) OVER (PARTITION BY [Type] ORDER BY StartDate), StartDate),0) AS GapDays
from
  #data
),
cte2 as (
select
  *,
  case when GapDays < 7 then 0 else 1 end as group1
from
  cte
),
cte3 as (
select
  *,
  sum(group1) over (partition by [type] order by startDate, endDate rows unbounded preceding) as group2
from
  cte2
)
select
  [TYPE],
  MIN(StartDate) AS StartDate,
  MAX(EndDate) AS EndDate,
  SUM(Dollars) AS Dollars,
  SUM(CASE WHEN GapDays > 7 THEN 0 ELSE GapDays END) AS GapDays
from
  cte3
group by
  [Type], group2

4 个答案:

答案 0 :(得分:0)

您必须先按类型获取日期差异组,然后在列上应用聚合函数,如下所示:

在这里查看DENSE_RANK(返回结果集分区内行的排名,排名没有任何差距。行的排名是一加上前面的不同排名的数量有问题的一行)

;WITH T AS
(
    SELECT
        *,      
        CASE WHEN Type=LAG(Type) OVER (ORDER BY Type) AND DATEDIFF(d,StartDate,EndDate) < 7 THEN DATEDIFF(d,lag(EndDate)  OVER (ORDER BY Type),StartDate) ELSE 0 END  AS GapDays,
        DENSE_RANK() OVER(ORDER BY Type,CASE WHEN DATEDIFF(d,StartDate,EndDate) < 7 THEN 1 ELSE 2 END) AS PartNo
    FROM @tblTest
)

SELECT
    Type,
    MIN(StartDate) AS StartDate,
    MAX(EndDate) AS EndDate, 
    ISNULL(SUM(Dollars),0) AS Dollars, 
    --DATEDIFF(d,MIN(StartDate),MAX(EndDate)) AS GapDays
    SUM(GapDays) AS GapDays
FROM T
GROUP BY Type,PartNo

<强>输出:

enter image description here

注意:您的问题似乎提供了错误的结果

答案 1 :(得分:0)

您可以使用如下查询: See working demo

create table data (Type int,StartDate date,EndDate date,  Dollars int)
insert into data values
(1,'01/01/2017','01/05/2017',10)
,(1,'01/08/2017','01/12/2017',10)
,(1,'01/17/2017','01/19/2017',10)
,(1,'01/28/2017','02/10/2017',10)
,(2,'10/01/2017','10/03/2017',10)
,(2,'10/20/2017','10/23/2017',10)
,(2,'10/25/2017','10/29/2017',10)

; with uniquerowset as
(
    select
    days=datediff(d,startdate,endDate)+1,
    grouped= 
        case 
            when 
                datediff(d,lag(EndDate) over(partition by Type order by StartDate asc),StartDate) >7
            then 0
            else 1
        end ,
    days_missed=
        datediff(d,lag(EndDate) over(partition by Type order by StartDate asc),StartDate),
    *
    from data
 ),
 finalresult as
 (
     select 
         *,
         rn =row_number() over ( partition by Type order by StartDate asc),
         rn2= row_number() over ( partition by Type order by grouped asc),
         days_missed_corrected= grouped * isnull(days_missed,0)
    from 
         uniquerowset
     )

 select 
     Type,
     StartDate = Min(StartDate),
     EndDate = Max(EndDate),
     Dollars = Sum(Dollars),
     GapDays = Sum(days_missed_corrected)
 from finalresult 
 group by rn2-rn, type
 order by min(StartDate)

答案 2 :(得分:0)

如果所有样本数据的输出正确  然后可以讨论和实施优化。

试试这个脚本,

create table #data (Type int,StartDate date,EndDate date,  Dollars int)
insert into #data values
(1,'01/01/2017','01/05/2017',10)
,(1,'01/08/2017','01/12/2017',10)
,(1,'01/17/2017','01/19/2017',10)
,(1,'01/28/2017','02/10/2017',10)
,(2,'10/01/2017','10/03/2017',10)
,(2,'10/20/2017','10/23/2017',10)
,(2,'10/25/2017','10/29/2017',10)

;with CTE as
(
select [Type],StartDate,EndDate,Dollars
,ROW_NUMBER()over(PARTITION by [Type] order by StartDate)rn

from #data  
)
,CTE1 AS
(
select [Type],StartDate,EndDate,Dollars,rn 
,1 grp
,0 gapDays
from cte
where rn=1

union ALL

select c.[Type],c.StartDate,c.EndDate,c.Dollars
,c.rn  
,case when DATEDIFF(day,c1.EndDate,c.StartDate)<7 
then grp else grp+1 end
,case when DATEDIFF(day,c1.EndDate,c.StartDate)<7 
then DATEDIFF(day,c1.EndDate,c.StartDate)
 else 0 end
from CTE c
inner join cte1 c1
on c.[type]=c1.[type]
and c.rn=c1.rn+1
)
,CTE2 AS(
select [type],[grp]
,sum(Dollars)Dollars
,sum(gapDays)gapDays
 from cte1
group by [type],[grp]
)

select c.[type],
c1.startDate, c1.EndDate 
,c.[grp] 
,c.Dollars,c.gapDays
from cte2 c
cross apply(
select min(startDate) startDate
,max(EndDate) EndDate from cte1 c1
where c1.[type]=c.[type]
and c1.grp=c.grp
)c1
--order by [type]
drop table #data

答案 3 :(得分:0)

解决方案:对&#34;连续&#34;使用二进制分组日期标准(在这种情况下<7天),然后使用该分组与无界限之前的行

with cte as (
select
  *,
  COALESCE(DATEDIFF(dd, LAG(EndDate, 1, NULL) OVER (PARTITION BY [Type] ORDER BY StartDate), StartDate),0) AS GapDays
from
  #data
),
cte2 as (
select
  *,
  case when GapDays < 7 then 0 else 1 end as group1
from
  cte
),
cte3 as (
select
  *,
  sum(group1) over (partition by [type] order by startDate, endDate rows unbounded preceding) as group2
from
  cte2
)
select
  [TYPE],
  MIN(StartDate) AS StartDate,
  MAX(EndDate) AS EndDate,
  SUM(Dollars) AS Dollars,
  SUM(CASE WHEN GapDays > 7 THEN 0 ELSE GapDays END) AS GapDays
from
  cte3
group by
  [Type], group2