我需要计算按小时消耗的资源量。例如,考虑电话通话时间。对于以下记录,我想生成以下结果:
Call_ID StartDateTime EndDateTime
1 3/15/2014 11:25 3/15/2014 14:01
2 3/15/2014 11:50 3/15/2014 13:10
3 3/15/2014 12:05 3/15/2014 12:55
4 3/15/2014 13:04 3/15/2014 15:02
5 3/15/2014 13:15 3/15/2014 14:22
6 3/15/2014 14:35 3/15/2014 15:18
7 3/15/2014 15:10 3/15/2014 15:29
Date Hour CallMinutes
3/15/2014 11 45
3/15/2014 12 170
3/15/2014 13 171
3/15/2014 14 108
3/15/2014 15 39
我有SQL会这样做,但它使用游标,我想找到一个更快的替代方案。这是我现在的工作代码:
/* create & fill the Inputs table */
CREATE TABLE PhoneCalls(
Call_ID int IDENTITY(1,1) NOT NULL,
StartDateTime datetime NOT NULL,
EndDateTime datetime NOT NULL,
CONSTRAINT PK_PhoneCalls PRIMARY KEY CLUSTERED ( Call_ID ASC) ON [PRIMARY]
) ON [PRIMARY];
INSERT INTO PhoneCalls (StartDateTime, EndDateTime) VALUES ('3/15/2014 11:25', '3/15/2014 14:01');
INSERT INTO PhoneCalls (StartDateTime, EndDateTime) VALUES ('3/15/2014 11:50', '3/15/2014 13:10');
INSERT INTO PhoneCalls (StartDateTime, EndDateTime) VALUES ('3/15/2014 12:05','3/15/2014 12:55');
INSERT INTO PhoneCalls (StartDateTime, EndDateTime) VALUES ('3/15/2014 13:04','3/15/2014 15:02');
INSERT INTO PhoneCalls (StartDateTime, EndDateTime) VALUES ('3/15/2014 13:15','3/15/2014 14:22');
INSERT INTO PhoneCalls (StartDateTime, EndDateTime) VALUES ('3/15/2014 14:35','3/15/2014 15:18');
INSERT INTO PhoneCalls (StartDateTime, EndDateTime) VALUES ('3/15/2014 15:10','3/15/2014 15:29');
/* Create the Temp table to hold the intermediate results */
Create Table #HourlyCallTimes (
Call_ID int NOT NULL,
CallDate date NOT NULL,
CallHour smallint NOT NULL,
CallTime int NOT NULL);
/* Determine Hourly totals of CallTime using a cursor */
SET NOCOUNT ON;
DECLARE @Call_ID int, @StartDt datetime, @EndDt datetime, @CallLength int, @Hour int, @CallTime int;
declare CallsCursor cursor for
select Call_ID, StartDateTime, EndDateTime, DATEDIFF(minute, StartDateTime, EndDateTime) as CallLength
FROM PhoneCalls
open CallsCursor
fetch next from CallsCursor into @Call_ID, @StartDt, @EndDt, @CallLength
while @@FETCH_STATUS = 0
begin
/* make sure that the call was at least a minute long */
IF (@CallLength > 0.01)
BEGIN
-- Record the call time for the first partial hour
SET @Hour = DATEPART(Hour, @StartDt)
SET @CallTime = 60 - DATEPART(Minute, @StartDt)
if (@CallLength < @CallTime) -- check if this us the last (partial) hour
SET @CallTime = @CallLength
INSERT INTO #HourlyCallTimes (Call_ID, CallDate, CallHour, CallTime)
VALUES (@Call_ID, CAST(@StartDt as DATE), @Hour, @CallTime)
-- move to the beginning of the next hour
SET @StartDt = DATEADD(HOUR,@Hour+1,CAST(CAST(@StartDt as DATE) as DateTime))
SET @CallLength = @CallLength - @CallTime
While @StartDt < @EndDt
BEGIN
SET @Hour = DATEPART(Hour, @StartDt)
SET @CallTime = 60;
if (@CallLength < @CallTime) -- check if this is the last (partial) hour
SET @CallTime = @CallLength
INSERT INTO #HourlyCallTimes (Call_ID, CallDate, CallHour, CallTime)
VALUES (@Call_ID, CAST(@StartDt as DATE), @Hour, @CallTime)
-- move to the beginning of the next hour
SET @StartDt = DATEADD(Hour,1,@StartDt)
SET @CallLength = @CallLength - @CallTime
END
END
fetch next from CallsCursor into @Call_ID, @StartDt, @EndDt, @CallLength
END
close CallsCursor;
deallocate CallsCursor;
/* here's the final results table */
Create Table HourlyCallTotals (
CallDate date NOT NULL,
CallHour smallint NOT NULL,
CallTime int NOT NULL);
Insert into HourlyCallTotals (CallDate, CallHour, CallTime)
select CallDate, CallHour, SUM(CallTime) from #HourlyCallTimes group by CallDate, CallHour;
DROP TABLE #HourlyCallTimes;
DROP TABLE PhoneCalls;
SELECT * FROM HourlyCallTotals;
DROP TABLE HourlyCallTotals;
/* expected Results */
/* CallDate CallHour CallTime
3/15/2014 11 45
3/15/2014 12 170
3/15/2014 13 171
3/15/2014 14 108
3/15/2014 15 39 */
答案 0 :(得分:1)
您可以尝试使用此递归CTE来获得所需的结果:
;WITH cte_break AS
(
SELECT
CONVERT(DATE,StartDateTime) AS CallDate,
DATEPART(HOUR,StartDateTime) AS CallHour,
CASE WHEN DATEDIFF(MINUTE,StartDateTime, EndDateTime) < 60 AND DATEPART(HOUR, StartDateTime) = DATEPART(HOUR, EndDateTime) THEN DATEDIFF(MINUTE,StartDateTime, EndDateTime)
ELSE (60 - DATEPART(MINUTE,StartDateTime)) END AS CallTime,
DATEADD(minute, CASE WHEN DATEDIFF(MINUTE,StartDateTime, EndDateTime) < 60 THEN DATEDIFF(MINUTE,StartDateTime, EndDateTime) ELSE (60 - DATEPART(MINUTE,StartDateTime)) END , StartDateTime) AS CalcStartTime,
EndDateTime
FROM dbo.PhoneCalls
UNION ALL
SELECT
CONVERT(DATE,CalcStartTime) AS CallDate,
DATEPART(HOUR,CalcStartTime) AS CallHour,
CASE WHEN DATEDIFF(MINUTE,CalcStartTime, EndDateTime) < 60 AND DATEPART(HOUR, CalcStartTime) = DATEPART(HOUR, cte_break.EndDateTime) THEN DATEDIFF(MINUTE,CalcStartTime, EndDateTime)
ELSE (60 - DATEPART(MINUTE,CalcStartTime)) END AS CallTime,
DATEADD(minute, CASE WHEN DATEDIFF(MINUTE,CalcStartTime, EndDateTime) < 60 THEN DATEDIFF(MINUTE,CalcStartTime, EndDateTime) ELSE (60 - DATEPART(MINUTE,CalcStartTime)) END , CalcStartTime) AS CalcStartTime,
EndDateTime
FROM cte_break
WHERE CalcStartTime < EndDateTime
)
SELECT CallDate, CallHour, SUM(CallTime) AS CallTime FROM cte_break
GROUP BY CallDate, CallHour
ORDER BY CallDate, CallHour
答案 1 :(得分:0)
要摆脱光标/循环:不是按时间计算通话时间和小时数,而是需要列出所有可能的小时数(从最小值(StartHour)到最大值(EndHour))并计算每次通话每小时一次。可以把它想象成一个矩阵,其中列是小时,行是调用,只需将列相加即可得到每小时的通话时间。
ID Hour0 Hour1 Hour2 Hour3 ...
1 0 0 4 0
2 0 0 3 0
3 0 1 2 0
...
现在,需要进行更多的计数/计算(最多比循环方法多23倍),但从循环变为基于逻辑的逻辑的好处远远超过成本。
这是部分解决方案(不能处理不同的日期):
DECLARE @maxHour INT, @minHour INT, @disHours INT
SELECT @minHour = MIN(DATEPART(HOUR, StartDateTime)),
@maxHour = MAX(DATEPART(HOUR, EndDateTime)),
@disHours = @maxHour - @minHour + 1
FROM PhoneCalls
-- Filling out a range from @minHour to @maxHour
SELECT TOP (@disHours) @minHour -1 + ROW_NUMBER() OVER (ORDER BY [object_id]) AS CallHour
INTO #hours
FROM sys.all_objects
ORDER BY CallHour
-- Count the per call per hour call time
SELECT CallHour,
CASE
WHEN DATEPART(HOUR, StartDateTime) < CallHour AND DATEPART(HOUR, EndDateTime) > CallHour THEN 60
WHEN DATEPART(HOUR, StartDateTime) = CallHour AND DATEPART(HOUR, EndDateTime) = CallHour THEN DATEPART(MINUTE, EndDateTime) - DATEPART(MINUTE, StartDateTime)
WHEN DATEPART(HOUR, StartDateTime) < CallHour AND DATEPART(HOUR, EndDateTime) = CallHour THEN DATEPART(MINUTE, EndDateTime)
WHEN DATEPART(HOUR, StartDateTime) = CallHour AND DATEPART(HOUR, EndDateTime) > CallHour THEN 60 - DATEPART(MINUTE, StartDateTime)
ELSE 0
END AS CallTimePerCallPerHour
INTO #cross
FROM PhoneCalls
CROSS JOIN #hours
-- The final result
SELECT CallHour,
SUM(CallTimePerCallPerHour) AS CallTime
FROM #cross
WHERE CallTimePerCallPerHour > 0 -- To left out hours with 0 CallTime in the result
GROUP BY CallHour
DROP TABLE #hours
DROP TABLE #cross
对于大约1M行,随机开始/结束时间从00:00到23:59(基于集合的方法的最坏情况),运行时间从90秒降到20秒。
至于不同的日期,循环日期可能仍然是一个好主意,否则中间表会变得太大。
此外,可能会在不同日期开始和结束通话。处理这个问题可能很棘手,但是因为它们无论如何都会成为少数,所以可能只需要添加一个预处理步骤来挑选它们并将它们分开:
3/15/2016 23:49 - 3/16/2016 00:05 =>
3/15/2016 23:49 - 3/15/2016 23:60 // instead of 24:00 so we don't need to count hour 24
3/16/2016 00:00 - 3/16/2016 00:05