我正在努力通过过滤器将连续日期结合起来。
示例:
我的数据集:
我需要的结果:(仅适用于paid = 3
)
架构和数据脚本
CREATE TABLE [dbo].[NRFC](
[work_status] [int] NOT NULL,
[paid] [varchar](2) NOT NULL,
[from_date] [date] NOT NULL,
[to_date] [date] NOT NULL)
INSERT INTO [dbo].[NRFC]([work_status], [paid], [from_date], [to_date])
SELECT 101, N'3', '20100101 00:00:00.000', '20100116 00:00:00.000' UNION ALL
SELECT 101, N'3', '20100116 00:00:00.000', '20150128 00:00:00.000' UNION ALL
SELECT 101, N'5', '20150129 00:00:00.000', '20150331 00:00:00.000' UNION ALL
SELECT 101, N'4', '20150401 00:00:00.000', '20150512 00:00:00.000' UNION ALL
SELECT 101, N'3', '20160101 00:00:00.000', '20170315 00:00:00.000' UNION ALL
SELECT 101, N'4', '20170316 00:00:00.000', '20170317 00:00:00.000' UNION ALL
SELECT 101, N'3', '20090101 00:00:00.000', '20090110 00:00:00.000' UNION ALL
SELECT 101, N'3', '20090111 00:00:00.000', '20090115 00:00:00.000' UNION ALL
SELECT 101, N'3', '20090115 00:00:00.000', '20090331 00:00:00.000' UNION ALL
SELECT 101, N'3', '20090401 00:00:00.000', '20091212 00:00:00.000'
到目前为止我的代码:
;WITH Grouped (
from_date
,to_date
,paid
,first
)
AS (
SELECT from_date
,to_date
,paid
,isnull((
SELECT CASE
WHEN paid <> w.paid
THEN 1
ELSE 0
END
FROM NRFC
WHERE from_date = (
SELECT max(from_date)
FROM NRFC
WHERE from_date < w.from_date
)
), 1) AS first
FROM NRFC w
WHERE paid = 3
)
SELECT min(from_date) AS from_date
,max(to_date) AS to_date
,paid
FROM (
SELECT from_date
,to_date
,paid
,isnull((
SELECT sum(first)
FROM grouped
WHERE from_date > g.from_date
), 0) AS part
FROM grouped g
) p
GROUP BY p.part
,p.paid
ORDER BY from_date
答案 0 :(得分:1)
这看起来与Itzik Ben-Gan在他的文章中详细讨论的Packing intervals问题完全相同。
主要思想是间隔的每个开始都标有+1
,并且间隔的每一端都标有-1
。如果这些标记的运行总数大于零,则我们处于必须打包的连续区间内。
阅读文章并逐步运行下面的查询,cte-by-cte并检查中间结果以了解其工作原理。
示例数据
DECLARE @NRFC TABLE(
[work_status] [int] NOT NULL,
[paid] [varchar](2) NOT NULL,
[from_date] [date] NOT NULL,
[to_date] [date] NOT NULL);
INSERT INTO @NRFC([work_status], [paid], [from_date], [to_date])
SELECT 101, N'3', '20100101 00:00:00.000', '20100116 00:00:00.000' UNION ALL
SELECT 101, N'3', '20100116 00:00:00.000', '20150128 00:00:00.000' UNION ALL
SELECT 101, N'5', '20150129 00:00:00.000', '20150331 00:00:00.000' UNION ALL
SELECT 101, N'4', '20150401 00:00:00.000', '20150512 00:00:00.000' UNION ALL
SELECT 101, N'3', '20160101 00:00:00.000', '20170315 00:00:00.000' UNION ALL
SELECT 101, N'4', '20170316 00:00:00.000', '20170317 00:00:00.000' UNION ALL
SELECT 101, N'3', '20090101 00:00:00.000', '20090110 00:00:00.000' UNION ALL
SELECT 101, N'3', '20090111 00:00:00.000', '20090115 00:00:00.000' UNION ALL
SELECT 101, N'3', '20090115 00:00:00.000', '20090331 00:00:00.000' UNION ALL
SELECT 101, N'3', '20090401 00:00:00.000', '20091212 00:00:00.000'
;
<强>查询强>
该查询采用开放式间隔,这就是为什么我在to_date
添加了1天并在最后减去它。
WITH
C1
AS
(
-- let e = end ordinals, let s = start ordinals
SELECT
work_status
,paid
,from_date AS ts -- "from" is inclusive
,+1 AS type
,NULL AS e
,ROW_NUMBER() OVER (PARTITION BY work_status, paid ORDER BY from_date) AS s
FROM @NRFC
UNION ALL
SELECT
work_status
,paid
,DATEADD(day, 1, to_date) AS ts -- "to" is exclusive
,-1 AS type
,ROW_NUMBER() OVER (PARTITION BY work_status, paid ORDER BY to_date) AS e
,NULL AS s
FROM @NRFC
)
,C2
AS
(
-- let se = start or end ordinal, namely,
-- how many events (start or end) happened so far
SELECT
C1.*
,ROW_NUMBER() OVER(PARTITION BY work_status, paid ORDER BY ts, type DESC) AS se
FROM C1
)
,C3
AS
-- For start events, the expression s - (se - s) - 1 represents how many sessions
-- were active just before the current (hence - 1)
--
-- For end events, the expression (se - e) - e represents how many sessions
-- are active right after this one
--
-- The above two expressions are 0 exactly when a group of packed intervals
-- either starts or ends, respectively
--
-- After filtering only events when a group of packed intervals either starts or ends,
-- group each pair of adjacent start/end events
(
SELECT
work_status
,paid
,ts
,(ROW_NUMBER() OVER (PARTITION BY work_status, paid ORDER BY ts) - 1)
/ 2 + 1 AS grpnum
FROM C2
WHERE COALESCE(s - (se - s) - 1, (se - e) - e) = 0
)
SELECT
work_status
,paid
,MIN(ts) AS starttime
,DATEADD(day, -1, MAX(ts)) AS endtime
FROM C3
GROUP BY
work_status
,paid
,grpnum
ORDER BY
work_status
,paid
,starttime
;
<强>结果强>
+-------------+------+------------+------------+
| work_status | paid | starttime | endtime |
+-------------+------+------------+------------+
| 101 | 3 | 2009-01-01 | 2009-12-12 |
| 101 | 3 | 2010-01-01 | 2015-01-28 |
| 101 | 3 | 2016-01-01 | 2017-03-15 |
| 101 | 4 | 2015-04-01 | 2015-05-12 |
| 101 | 4 | 2017-03-16 | 2017-03-17 |
| 101 | 5 | 2015-01-29 | 2015-03-31 |
+-------------+------+------------+------------+
答案 1 :(得分:1)
另一个问题:
CREATE TABLE #tb([work_status] [int] NOT NULL,[paid] [varchar](2) NOT NULL,[from_date] [date] NOT NULL,[to_date] [date] NOT NULL);
INSERT INTO #tb([work_status], [paid], [from_date], [to_date])
SELECT 101, N'3', '20100101 00:00:00.000', '20100116 00:00:00.000' UNION ALL
SELECT 101, N'3', '20100116 00:00:00.000', '20150128 00:00:00.000' UNION ALL
SELECT 101, N'5', '20150129 00:00:00.000', '20150331 00:00:00.000' UNION ALL
SELECT 101, N'4', '20150401 00:00:00.000', '20150512 00:00:00.000' UNION ALL
SELECT 101, N'3', '20160101 00:00:00.000', '20170315 00:00:00.000' UNION ALL
SELECT 101, N'4', '20170316 00:00:00.000', '20170317 00:00:00.000' UNION ALL
SELECT 101, N'3', '20090101 00:00:00.000', '20090110 00:00:00.000' UNION ALL
SELECT 101, N'3', '20090111 00:00:00.000', '20090115 00:00:00.000' UNION ALL
SELECT 101, N'3', '20090115 00:00:00.000', '20090331 00:00:00.000' UNION ALL
SELECT 101, N'3', '20090401 00:00:00.000', '20091212 00:00:00.000'
SELECT tt.work_status,tt.paid,MIN(dt) AS from_date,MAX(dt) AS to_date
FROM (
SELECT DISTINCT t.work_status,t.paid,DATEADD(d,sv.number,t.from_date) AS dt
,DENSE_RANK()OVER(PARTITION BY t.work_status,t.paid ORDER BY DATEADD(d,sv.number,t.from_date)) AS rn
,MIN(DATEADD(d,sv.number,t.from_date))OVER(PARTITION BY t.work_status,t.paid) AS from_date
FROM #tb AS t
INNER JOIN master.dbo.spt_values AS sv on sv.type='P' AND sv.number BETWEEN 0 AND DATEDIFF(d,t.from_date,t.to_date)
) AS tt
GROUP BY tt.work_status,tt.paid,rn-DATEDIFF(d,tt.from_date,dt)
ORDER BY tt.work_status,tt.paid,MIN(dt)
work_status paid from_date to_date ----------- ---- ---------- ---------- 101 3 2009-01-01 2009-12-12 101 3 2010-01-01 2015-01-28 101 3 2016-01-01 2017-03-15 101 4 2015-04-01 2015-05-12 101 4 2017-03-16 2017-03-17 101 5 2015-01-29 2015-03-31
答案 2 :(得分:0)
使用递归CTE我们可以实现所需的输出。我只考虑pay = 3编写代码。请检查。
;with CTE as
(
select a.work_status,a.paid,a.from_date,ISNULL(B.to_date,A.to_date)to_date
from NFRC A left outer join NFRC B on (A.to_date=B.from_date OR DATEDIFF(D,A.to_date,B.from_date)=1) and A.paid=B.paid
where a.paid=3
union all
select a.work_status,a.paid,a.from_date,b.to_date
from CTE A inner join NFRC B on (A.to_date=B.from_date OR DATEDIFF(D,A.to_date,B.from_date)=1) and A.paid=B.paid
where a.paid=3
)
select work_status,paid,min(from_date)from_date,to_date
from (select work_status,paid,from_date,max(to_date) to_date
from CTE
group by from_date,work_status,paid ) G
group by to_date,work_status,paid
答案 3 :(得分:0)
这是组和窗口的典型示例。
首先你需要设置一个重置点,在这种情况下,重置点是paid
与之前的paid
不同,或者to_date
与下一个{{1之间的差异天数大于1天。
from_date
work_status | paid | from_date | to_date | last_paid | dif_days | is_reset ----------: | :--- | :----------| :--------- | :-------- | -------: | -------: 101 | 3 | 01/01/2009 | 10/01/2009 | 3 | 0 | null 101 | 3 | 11/01/2009 | 15/01/2009 | 3 | 1 | null 101 | 3 | 15/01/2009 | 31/03/2009 | 3 | 0 | null 101 | 3 | 01/04/2009 | 12/12/2009 | 3 | 1 | null 101 | 3 | 01/01/2010 | 16/01/2010 | 3 | 20 | 1 101 | 3 | 16/01/2010 | 28/01/2015 | 3 | 0 | null 101 | 3 | 01/01/2016 | 15/03/2017 | 3 | 338 | 1 101 | 4 | 01/04/2015 | 12/05/2015 | 3 | 0 | 1 101 | 4 | 16/03/2017 | 17/03/2017 | 4 | 674 | 1 101 | 5 | 29/01/2015 | 31/03/2015 | 4 | 0 | 1
在我们知道重置点之后,我们就可以生成组。
select work_status, paid, from_date, to_date,
coalesce(lag(paid) over (partition by work_status order by work_status, paid, from_date), paid) last_paid,
coalesce(datediff(day, lag(to_date) OVER (partition by work_status, paid ORDER BY work_status, paid, from_date), from_date),0) dif_days,
case when coalesce(lag(paid) over (partition by work_status order by work_status, paid, from_date), paid) <> paid
or
coalesce(datediff(day, lag(to_date) OVER (partition by work_status, paid ORDER BY work_status, paid, from_date), from_date),0) > 1
then 1 end is_reset
from @NRFC;
work_status | paid | from_date | to_date | is_reset | grp ----------: | :--- | :------------------ | :------------------ | -------: | --: 101 | 3 | 01/01/2009 00:00:00 | 10/01/2009 00:00:00 | null | 0 101 | 3 | 11/01/2009 00:00:00 | 15/01/2009 00:00:00 | null | 0 101 | 3 | 15/01/2009 00:00:00 | 31/03/2009 00:00:00 | null | 0 101 | 3 | 01/04/2009 00:00:00 | 12/12/2009 00:00:00 | null | 0 101 | 3 | 01/01/2010 00:00:00 | 16/01/2010 00:00:00 | 1 | 1 101 | 3 | 16/01/2010 00:00:00 | 28/01/2015 00:00:00 | null | 1 101 | 3 | 01/01/2016 00:00:00 | 15/03/2017 00:00:00 | 1 | 2 101 | 4 | 01/04/2015 00:00:00 | 12/05/2015 00:00:00 | 1 | 3 101 | 4 | 16/03/2017 00:00:00 | 17/03/2017 00:00:00 | 1 | 4 101 | 5 | 29/01/2015 00:00:00 | 31/03/2015 00:00:00 | 1 | 5
最后使用MIN(),MAX()使用新组:
work_status | paid | from_date | to_date ----------: | :--- | :------------------ | :------------------ 101 | 3 | 01/01/2009 00:00:00 | 12/12/2009 00:00:00 101 | 3 | 01/01/2010 00:00:00 | 28/01/2015 00:00:00 101 | 3 | 01/01/2016 00:00:00 | 15/03/2017 00:00:00 101 | 4 | 01/04/2015 00:00:00 | 12/05/2015 00:00:00 101 | 4 | 16/03/2017 00:00:00 | 17/03/2017 00:00:00 101 | 5 | 29/01/2015 00:00:00 | 31/03/2015 00:00:00
dbfiddle here