假设我有一个包含personId
,startDate
和endDate
的事件表。
我想知道X人花了多少时间做事件(事件可以互相覆盖)。
如果此人只有1个事件,则很容易:datediff(dd, startDate, endDate)
如果这个人有2个事件就很棘手。
我将为预期结果设置一些方案。
情景1
startDate endDate
1 4
3 5
这意味着他的结果应该是1到5的日期。
情景2
startDate endDate
1 3
6 9
这意味着他的结果应该是datediff(dd,1,3)
和datediff(dd,6,9)
如何在sql查询中获得此结果?我只能想到一堆if语句,但是同一个人可以有n个事件,所以查询会非常混乱。
粉碎机编辑:我想添加第三种方案:
startDate endDate
1 5
4 8
11 15
粉碎机场景的理想结果:
(1,5)和(4,8)在(1,8)中合并,因为它们重叠,那么我们需要datediff(1,8) + datediff(11,15)
=> 7 + 4 => 11
答案 0 :(得分:10)
您可以使用递归CTE来构建日期列表,然后计算不同的日期。
declare @T table
(
startDate date,
endDate date
);
insert into @T values
('2011-01-01', '2011-01-05'),
('2011-01-04', '2011-01-08'),
('2011-01-11', '2011-01-15');
with C as
(
select startDate,
endDate
from @T
union all
select dateadd(day, 1, startDate),
endDate
from C
where dateadd(day, 1, startDate) < endDate
)
select count(distinct startDate) as DayCount
from C
option (MAXRECURSION 0)
结果:
DayCount
-----------
11
或者你可以使用数字表。这里我使用master..spt_values:
declare @MinStartDate date
select @MinStartDate = min(startDate)
from @T
select count(distinct N.number)
from @T as T
inner join master..spt_values as N
on dateadd(day, N.Number, @MinStartDate) between T.startDate and dateadd(day, -1, T.endDate)
where N.type = 'P'
答案 1 :(得分:2)
以下SQL适用于您所描述的三种情况
with sampleData
AS (
SELECT 1 personid,1 startDate,4 endDate
UNION SELECT 1,3,5
UNION SELECT 2,1,3
UNION SELECT 2,6,9
UNION SELECT 3,1,5
UNION SELECT 3,4,8
UNION SELECT 3,11, 15
),
cte
AS (SELECT personid,
startdate,
enddate,
Row_number() OVER(ORDER BY personid, startdate) AS rn
FROM sampledata),
overlaps
AS (SELECT a.personid,
a.startdate,
b.enddate,
a.rn id1,
b.rn id2
FROM cte a
INNER JOIN cte b
ON a.personid = b.personid
AND a.enddate > b.startdate
AND a.rn = b.rn - 1),
nooverlaps
AS (SELECT a.personid,
a.startdate,
a.enddate
FROM cte a
LEFT JOIN overlaps b
ON a.rn = b.id1
OR a.rn = b.id2
WHERE b.id1 IS NULL)
SELECT personid,
SUM(timespent) timespent
FROM (SELECT personid,
enddate - startdate timespent
FROM nooverlaps
UNION
SELECT personid,
enddate - startdate
FROM overlaps) t
GROUP BY personid
产生此结果
Personid timeSpent
----------- -----------
1 4
2 5
3 11
注意:我使用了简单的整数,但DateDiffs也应该起作用
正确性问题如果您的数据被允许有多个重叠,则会出现正确性问题,如Cheran S所述,结果将不正确,您应该使用其他答案之一。他的例子使用[1,5],[4,8],[7,11]作为同一个人身份
答案 2 :(得分:2)
这是一个使用 Tally表想法的解决方案(我在Itzk Ben-Gan的一篇文章中首次听到 - 当主题出现时我仍然剪切并粘贴他的代码)。我们的想法是生成一个升序整数列表,按照数字的范围连接源数据,然后计算不同数字的数量,如下所示。 (此代码使用SQL Server 2008中的语法,但稍作修改可在SQL 2005中使用。)
首先设置一些测试数据:
CREATE TABLE #EventTable
(
PersonId int not null
,startDate datetime not null
,endDate datetime not null
)
INSERT #EventTable
values (1, 'Jan 1, 2011', 'Jan 4, 2011')
,(1, 'Jan 3, 2011', 'Jan 5, 2011')
,(2, 'Jan 1, 2011', 'Jan 3, 2011')
,(2, 'Jan 6, 2011', 'Jan 9, 2011')
确定一些初始值
DECLARE @Interval bigint ,@ FirstDay日期时间 ,@ PersonId int = 1 - (或其他)
获取第一天和最大可能日期数(以防止cte产生额外值):
SELECT
@Interval = datediff(dd, min(startDate), max(endDate)) + 1
,@FirstDay = min(startDate)
from #EventTable
where PersonId = @PersonId
剪切并粘贴一个例程并修改并测试它只返回我们需要的整数:
/*
;WITH
Pass0 as (select 1 as C union all select 1), --2 rows
Pass1 as (select 1 as C from Pass0 as A, Pass0 as B),--4 rows
Pass2 as (select 1 as C from Pass1 as A, Pass1 as B),--16 rows
Pass3 as (select 1 as C from Pass2 as A, Pass2 as B),--256 rows
Pass4 as (select 1 as C from Pass3 as A, Pass3 as B),--65536 rows
Pass5 as (select 1 as C from Pass4 as A, Pass4 as B),--4,294,967,296 rows
Tally as (select row_number() over(order by C) as Number from Pass5)
select Number from Tally where Number <= @Interval
*/
现在通过首先加入每个源行中定义的区间来修改它,然后计算找到的每个不同的值:
;WITH
Pass0 as (select 1 as C union all select 1), --2 rows
Pass1 as (select 1 as C from Pass0 as A, Pass0 as B),--4 rows
Pass2 as (select 1 as C from Pass1 as A, Pass1 as B),--16 rows
Pass3 as (select 1 as C from Pass2 as A, Pass2 as B),--256 rows
Pass4 as (select 1 as C from Pass3 as A, Pass3 as B),--65536 rows
Pass5 as (select 1 as C from Pass4 as A, Pass4 as B),--4,294,967,296 rows
Tally as (select row_number() over(order by C) as Number from Pass5)
SELECT PersonId, count(distinct Number) EventDays
from #EventTable et
inner join Tally
on dateadd(dd, Tally.Number - 1, @FirstDay) between et.startDate and et.endDate
where et.PersonId = @PersonId
and Number <= @Interval
group by PersonId
取出@PersonId
过滤器,您就可以获得所有人的帮助。通过微小的修改,您可以在任何时间间隔内进行,而不仅仅是几天(这就是为什么我将Tally表设置为生成非常大的数字。)
答案 3 :(得分:1)
尝试这样的事情
select
personId,
sum(DateDuration) as TotalDuration
from
(
select personId, datediff(dd, startDate, endDate) as DateDuration
from yourEventTable
) a
group by personId
答案 4 :(得分:1)
代数。如果B-n是第n个事件的结束时间,并且A-n是第n个事件的开始时间,则差的总和是总和的差。所以你可以写
select everything else, sum(cast(endDate as int)) - sum(cast(startDate as int)) as daysSpent
如果您的日期没有时间组件,则此方法有效。否则,你可以使用真实的。
答案 5 :(得分:1)
;WITH cte(gap)
AS
(
SELECT sum(b-a) from xxx GROUP BY uid
)
SELECT * FROM cte
答案 6 :(得分:-1)
编辑1 :我修改了两个解决方案以获得正确的结果。
编辑2 :我使用Mikael Eriksson,Conrad Frix,Philip Kelley和我提出的解决方案进行了对比测试。所有测试都使用具有以下结构的EventTable
:
CREATE TABLE EventTable
(
EventID INT IDENTITY PRIMARY KEY
,PersonId INT NOT NULL
,StartDate DATETIME NOT NULL
,EndDate DATETIME NOT NULL
,CONSTRAINT CK_StartDate_Before_EndDate CHECK(StartDate < EndDate)
);
此外,所有测试都使用暖缓冲区(无DBCC DROPCLEANBUFFERS
)和冷[计划]缓存(我在每次测试之前都执行了DBCC FREEPROCCACHE
)。由于某些解决方案使用过滤器(PersonId = 1
)而其他解决方案没有使用,因此我只为一个人(EventTable
)插入了INSERT ...(PersonId,...) VALUES (1,...)
行。
结果如下:
我的解决方案使用recursive CTEs。
解决方案1:
WITH BaseCTE
AS
(
SELECT e.StartDate
,e.EndDate
,e.PersonId
,ROW_NUMBER() OVER(PARTITION BY e.PersonId ORDER BY e.StartDate, e.EndDate) RowNumber
FROM EventTable e
), RecursiveCTE
AS
(
SELECT b.PersonId
,b.RowNumber
,b.StartDate
,b.EndDate
,b.EndDate AS MaxEndDate
,1 AS PseudoDenseRank
FROM BaseCTE b
WHERE b.RowNumber = 1
UNION ALL
SELECT crt.PersonId
,crt.RowNumber
,crt.StartDate
,crt.EndDate
,CASE WHEN crt.EndDate > prev.MaxEndDate THEN crt.EndDate ELSE prev.MaxEndDate END
,CASE WHEN crt.StartDate <= prev.MaxEndDate THEN prev.PseudoDenseRank ELSE prev.PseudoDenseRank + 1 END
FROM RecursiveCTE prev
INNER JOIN BaseCTE crt ON prev.PersonId = crt.PersonId
AND prev.RowNumber + 1 = crt.RowNumber
), SumDaysPerPersonAndInterval
AS
(
SELECT src.PersonId
,src.PseudoDenseRank --Interval ID
,DATEDIFF(DAY, MIN(src.StartDate), MAX(src.EndDate)) Days
FROM RecursiveCTE src
GROUP BY src.PersonId, src.PseudoDenseRank
)
SELECT x.PersonId, SUM( x.Days ) DaysPerPerson
FROM SumDaysPerPersonAndInterval x
GROUP BY x.PersonId
OPTION(MAXRECURSION 32767);
解决方案2:
DECLARE @Base TABLE --or a temporary table: CREATE TABLE #Base (...)
(
PersonID INT NOT NULL
,StartDate DATETIME NOT NULL
,EndDate DATETIME NOT NULL
,RowNumber INT NOT NULL
,PRIMARY KEY(PersonID, RowNumber)
);
INSERT @Base (PersonID, StartDate, EndDate, RowNumber)
SELECT e.PersonId
,e.StartDate
,e.EndDate
,ROW_NUMBER() OVER(PARTITION BY e.PersonID ORDER BY e.StartDate, e.EndDate) RowNumber
FROM EventTable e;
WITH RecursiveCTE
AS
(
SELECT b.PersonId
,b.RowNumber
,b.StartDate
,b.EndDate
,b.EndDate AS MaxEndDate
,1 AS PseudoDenseRank
FROM @Base b
WHERE b.RowNumber = 1
UNION ALL
SELECT crt.PersonId
,crt.RowNumber
,crt.StartDate
,crt.EndDate
,CASE WHEN crt.EndDate > prev.MaxEndDate THEN crt.EndDate ELSE prev.MaxEndDate END
,CASE WHEN crt.StartDate <= prev.MaxEndDate THEN prev.PseudoDenseRank ELSE prev.PseudoDenseRank + 1 END
FROM RecursiveCTE prev
INNER JOIN @Base crt ON prev.PersonId = crt.PersonId
AND prev.RowNumber + 1 = crt.RowNumber
), SumDaysPerPersonAndInterval
AS
(
SELECT src.PersonId
,src.PseudoDenseRank --Interval ID
,DATEDIFF(DAY, MIN(src.StartDate), MAX(src.EndDate)) Days
FROM RecursiveCTE src
GROUP BY src.PersonId, src.PseudoDenseRank
)
SELECT x.PersonId, SUM( x.Days ) DaysPerPerson
FROM SumDaysPerPersonAndInterval x
GROUP BY x.PersonId
OPTION(MAXRECURSION 32767);