假设我有一个包含大量日期的表格,例如:
declare @tbl table {
idx int primary key,
startdate datetime,
enddate datetime
}
我想找到startdate和enddate相交的最大行集(在现实世界中,开始日期和结束日期代表事件的开始和结束时间,我需要找到同时发生的最大事件数量)。
在另一种编程语言中,我可以通过startdate对所有条目进行排序,然后遍历每个条目一次,构建一组临时交集(跟踪生成的最大集合)。但我不确定这是否是在T-SQL中表达这一点的最有效方式。帮助!
哦,这是SQL Server 2000. :(
答案 0 :(得分:3)
已更新,以删除所有
declare @tbl table (
idx int identity(1,1) primary key,
startdate datetime,
enddate datetime);
insert into @tbl (startdate, enddate)
select '2009-01-01', '2009-01-05'
union all select '2009-01-02', '2009-01-04'
union all select '2009-01-01', '2009-01-03'
union all select '2009-01-03', '2009-01-06'
union all select '2009-01-04', '2009-01-07'
union all select '2009-01-05', '2009-01-08'
select idx, startdate
, (select sum(in_or_out)
from (
select case when startdate<=all_events.startdate then 1 else 0 end
+ case when enddate <= all_events.startdate then -1 else 0 end as in_or_out
from @tbl
where startdate <= all_events.startdate
or enddate <= all_events.startdate) as previous
) as concurent
from @tbl all_events
order by startdate
这给出了开始会话的时间表,以及新会话开始时的concurent会话数:
idx startdate concurent
3 2009-01-01 00:00:00.000 2
1 2009-01-01 00:00:00.000 2
2 2009-01-02 00:00:00.000 3
4 2009-01-03 00:00:00.000 3
5 2009-01-04 00:00:00.000 3
6 2009-01-05 00:00:00.000 3
要获取原始请求(具有最大concurency的concurent会话集),您需要运行此查询两次,一次获取最大concurent会话,一次获取具有最大concurent时间的会话的开始日期,然后您必须得到那些会议。
<强>更新强>
好的,所以这里一个查询检索最大的concurent会话。我更改了测试数据,以删除结束和开始的ambibuos重叠:
declare @tbl table (
idx int identity(1,1) primary key,
startdate datetime,
enddate datetime);
insert into @tbl (startdate, enddate)
select '2009-01-01', '2009-01-04 23:59:59'
union all select '2009-01-02', '2009-01-03 23:59:59'
union all select '2009-01-01', '2009-01-02 23:59:59'
union all select '2009-01-03', '2009-01-03 23:59:59'
union all select '2009-01-04', '2009-01-04 23:59:59'
union all select '2009-01-05', '2009-01-05 23:59:59'
select max_concurent_starts.startdate as concurentdate
, session.*
from (
select *
,(
select sum(in_or_out)
from (
select case when startdate<=all_events.startdate then 1 else 0 end
+ case when enddate <= all_events.startdate then -1 else 0 end
as in_or_out
from @tbl
where startdate <= all_events.startdate
or enddate <= all_events.startdate) as previous
) as concurent
from @tbl all_events) as max_concurent_starts
join @tbl as session
on session.startdate <= max_concurent_starts.startdate
and session.enddate >= max_concurent_starts.startdate
where concurent = (
select top 1 concurent
from (
select (
select sum(in_or_out)
from (
select case when startdate<=all_events.startdate then 1 else 0 end
+ case when enddate <= all_events.startdate then -1 else 0 end
as in_or_out
from @tbl
where startdate <= all_events.startdate
or enddate <= all_events.startdate) as previous
) as concurent
from @tbl all_events) as all_events_with_concurent
order by concurent desc)
order by concurentdate, startdate;
这给出了如下结果:
concurentdate idx startdate enddate
2009-01-02 00:00:00.000 3 2009-01-01 00:00:00.000 2009-01-02 23:59:59.000
2009-01-02 00:00:00.000 1 2009-01-01 00:00:00.000 2009-01-04 23:59:59.000
2009-01-02 00:00:00.000 2 2009-01-02 00:00:00.000 2009-01-03 23:59:59.000
2009-01-03 00:00:00.000 1 2009-01-01 00:00:00.000 2009-01-04 23:59:59.000
2009-01-03 00:00:00.000 2 2009-01-02 00:00:00.000 2009-01-03 23:59:59.000
2009-01-03 00:00:00.000 4 2009-01-03 00:00:00.000 2009-01-03 23:59:59.000
其内容如下:在2009-01-02 00:00:00
上有3个连续会话(3,1和2),它们各自的开始和结束。在2009-01-03 00:00:00
上有一个平局,还有3个连续的会话(1,2和4),各自的开始和结束。
表现形式可能会有所不同。在使用CTE的SQL 2005中,查询可以简化一百万次。
答案 1 :(得分:2)
尝试这个(它接近我想要的......
Select Distinct EventId
From EventTable Et
Join (Select Top 1 RunDate, Count(*) DateCount
From
(Select Distinct StartDate RunDate
From EventTable
Union
Select Distinct EndDate RunDate
From EventTable) A
Join EventTable E
On A.RunDate Between E.StartDate And E.EndDate
Group By RunDate
Order By Count(*) Desc) Z
On Z.RunDate Between Et.StartDate and Et.EndDate
哦,如果您的日期中包含日期和时间,则将此处的所有日期替换为仅实际日期部分(剥离时间)
Select Distinct EventId
From EventTable Et
Join (Select Top 1 RunDate, Count(*) DateCount
From
(Select Distinct DateAdd(day, 0, DateDiff(day, 0, StartDate)) RunDate
From EventTable
Union
Select Distinct DateAdd(day, 0, DateDiff(day, -1, EndDate)) RunDate
From EventTable) A
Join EventTable E
On A.RunDate Between DateAdd(day, 0, DateDiff(day, 0, E.StartDate))
and DateAdd(day, 0, DateDiff(day, -1, E.EndDate))
Group By RunDate
Order By Count(*) Desc) Z
On Z.RunDate Between DateAdd(day, 0, DateDiff(day, 0, Et.StartDate))
and DateAdd(day, 0, DateDiff(day, -1, Et.EndDate))
答案 2 :(得分:0)
另一种方法:
DECLARE @idx INT,
@startdate DATETIME,
@enddate DATETIME,
@prev_enddate DATETIME,
@counter INT,
@counter_max INT
DECLARE db_cursor CURSOR FOR
SELECT idx, startdate,enddate
FROM @tbl
ORDER BY startdate,enddate
OPEN db_cursor
FETCH NEXT FROM db_cursor INTO @idx, @startdate, @enddate
SET @prev_enddate = @enddate
SET @counter = 0
SET @counter_max = 0
WHILE @@FETCH_STATUS = 0
BEGIN
IF @startdate < @prev_enddate
BEGIN
SET @counter = @counter + 1
IF @counter > @counter_max
BEGIN
SET @counter_max = @counter
END
END
ELSE
BEGIN
SET @counter = 1
END
SET @prev_enddate = @enddate
FETCH NEXT FROM db_cursor INTO @idx, @startdate, @enddate
END
CLOSE db_cursor
DEALLOCATE db_cursor
SELECT @counter_max
答案 3 :(得分:0)
这个很简短,易于理解并且工作正常:
CREATE PROCEDURE FindEvents
AS
BEGIN
DECLARE dates_cursor CURSOR FOR
SELECT
startdate AS thedate, 1 AS change
FROM
dates
UNION
SELECT
enddate AS thedate, - 1 AS change
FROM
dates
ORDER BY
thedate ASC;
DECLARE @max INT;
DECLARE @thedate DATETIME;
DECLARE @change INT;
DECLARE @current INT;
SET @max = 0;
SET @current = 0;
OPEN dates_cursor
FETCH NEXT FROM dates_cursor INTO @thedate, @change
WHILE @@FETCH_STATUS = 0
BEGIN
SET @current = @current + @change;
IF (@current > @max)
BEGIN
SET @max = @current;
END
FETCH NEXT FROM dates_cursor INTO @thedate, @change
END
CLOSE dates_cursor
DEALLOCATE dates_cursor
SELECT @max;
END