用于计算每小时金额的SQL语句

时间:2016-03-15 23:10:43

标签: sql sql-server tsql cursor

我需要计算按小时消耗的资源量。例如,考虑电话通话时间。对于以下记录,我想生成以下结果:

Call_ID   StartDateTime     EndDateTime  
1         3/15/2014 11:25   3/15/2014 14:01  
2         3/15/2014 11:50   3/15/2014 13:10  
3         3/15/2014 12:05   3/15/2014 12:55  
4         3/15/2014 13:04   3/15/2014 15:02  
5         3/15/2014 13:15   3/15/2014 14:22  
6         3/15/2014 14:35   3/15/2014 15:18  
7         3/15/2014 15:10   3/15/2014 15:29  

Date        Hour    CallMinutes  
3/15/2014   11       45  
3/15/2014   12       170  
3/15/2014   13       171  
3/15/2014   14       108  
3/15/2014   15       39  

我有SQL会这样做,但它使用游标,我想找到一个更快的替代方案。这是我现在的工作代码:

/* create & fill the Inputs table */  
CREATE TABLE PhoneCalls(  
    Call_ID         int IDENTITY(1,1) NOT NULL,  
    StartDateTime   datetime NOT NULL,  
    EndDateTime     datetime NOT NULL,  
 CONSTRAINT PK_PhoneCalls PRIMARY KEY CLUSTERED ( Call_ID ASC) ON [PRIMARY]  
) ON [PRIMARY];  

INSERT INTO PhoneCalls (StartDateTime, EndDateTime) VALUES ('3/15/2014 11:25', '3/15/2014 14:01');  
INSERT INTO PhoneCalls (StartDateTime, EndDateTime) VALUES ('3/15/2014 11:50', '3/15/2014 13:10');  
INSERT INTO PhoneCalls (StartDateTime, EndDateTime) VALUES ('3/15/2014 12:05','3/15/2014 12:55');  
INSERT INTO PhoneCalls (StartDateTime, EndDateTime) VALUES ('3/15/2014 13:04','3/15/2014 15:02');  
INSERT INTO PhoneCalls (StartDateTime, EndDateTime) VALUES ('3/15/2014 13:15','3/15/2014 14:22');  
INSERT INTO PhoneCalls (StartDateTime, EndDateTime) VALUES ('3/15/2014 14:35','3/15/2014 15:18');  
INSERT INTO PhoneCalls (StartDateTime, EndDateTime) VALUES ('3/15/2014 15:10','3/15/2014 15:29');  

/* Create the Temp table to hold the intermediate results */
Create Table #HourlyCallTimes (
    Call_ID     int NOT NULL,
    CallDate    date NOT NULL,
    CallHour    smallint NOT NULL,
    CallTime    int NOT NULL);

/* Determine Hourly totals of CallTime using a cursor */
SET NOCOUNT ON;
DECLARE @Call_ID int, @StartDt datetime, @EndDt datetime, @CallLength int, @Hour int, @CallTime int;
declare CallsCursor cursor for
    select Call_ID, StartDateTime, EndDateTime, DATEDIFF(minute, StartDateTime, EndDateTime) as CallLength
    FROM PhoneCalls
open CallsCursor
fetch next from CallsCursor into @Call_ID, @StartDt, @EndDt, @CallLength
while @@FETCH_STATUS = 0
begin
    /* make sure that the call was at least a minute long */
    IF (@CallLength > 0.01)
    BEGIN
        -- Record the call time for the first partial hour
        SET @Hour = DATEPART(Hour, @StartDt)
        SET @CallTime = 60 - DATEPART(Minute, @StartDt)
        if (@CallLength < @CallTime) -- check if this us the last (partial) hour
            SET @CallTime = @CallLength
        INSERT INTO #HourlyCallTimes (Call_ID, CallDate, CallHour, CallTime)
            VALUES (@Call_ID, CAST(@StartDt as DATE), @Hour, @CallTime)

        -- move to the beginning of the next hour
        SET @StartDt = DATEADD(HOUR,@Hour+1,CAST(CAST(@StartDt as DATE) as DateTime))
        SET @CallLength = @CallLength - @CallTime
        While @StartDt < @EndDt
        BEGIN
            SET @Hour = DATEPART(Hour, @StartDt)
            SET @CallTime = 60;
            if (@CallLength < @CallTime)  -- check if this is the last (partial) hour
                SET @CallTime = @CallLength
        INSERT INTO #HourlyCallTimes (Call_ID, CallDate, CallHour, CallTime)
            VALUES (@Call_ID, CAST(@StartDt as DATE), @Hour, @CallTime)
            -- move to the beginning of the next hour
            SET @StartDt = DATEADD(Hour,1,@StartDt)     
            SET @CallLength = @CallLength - @CallTime
        END
    END
    fetch next from CallsCursor into @Call_ID, @StartDt, @EndDt, @CallLength
END
close CallsCursor;
deallocate CallsCursor;

/* here's the final results table */
Create Table HourlyCallTotals (
    CallDate    date NOT NULL,
    CallHour    smallint NOT NULL,
    CallTime    int NOT NULL);

Insert into HourlyCallTotals (CallDate, CallHour, CallTime)
    select CallDate, CallHour, SUM(CallTime) from #HourlyCallTimes group by CallDate, CallHour;

DROP TABLE #HourlyCallTimes;
DROP TABLE PhoneCalls;

SELECT * FROM HourlyCallTotals;
DROP TABLE HourlyCallTotals;

/* expected Results */
/*  CallDate    CallHour    CallTime
    3/15/2014       11          45
    3/15/2014       12          170
    3/15/2014       13          171
    3/15/2014       14          108
    3/15/2014       15          39 */

2 个答案:

答案 0 :(得分:1)

您可以尝试使用此递归CTE来获得所需的结果:

;WITH cte_break AS
(
SELECT 
CONVERT(DATE,StartDateTime) AS CallDate, 
DATEPART(HOUR,StartDateTime) AS CallHour, 
CASE WHEN DATEDIFF(MINUTE,StartDateTime, EndDateTime) < 60 AND DATEPART(HOUR, StartDateTime) = DATEPART(HOUR, EndDateTime) THEN DATEDIFF(MINUTE,StartDateTime, EndDateTime) 
ELSE (60 - DATEPART(MINUTE,StartDateTime)) END AS CallTime,
DATEADD(minute, CASE WHEN DATEDIFF(MINUTE,StartDateTime, EndDateTime) < 60 THEN DATEDIFF(MINUTE,StartDateTime, EndDateTime) ELSE (60 - DATEPART(MINUTE,StartDateTime)) END , StartDateTime) AS CalcStartTime,
EndDateTime
FROM dbo.PhoneCalls

UNION ALL
SELECT 
CONVERT(DATE,CalcStartTime) AS CallDate, 
DATEPART(HOUR,CalcStartTime) AS CallHour, 
CASE WHEN DATEDIFF(MINUTE,CalcStartTime, EndDateTime) < 60 AND DATEPART(HOUR, CalcStartTime) = DATEPART(HOUR, cte_break.EndDateTime) THEN DATEDIFF(MINUTE,CalcStartTime, EndDateTime) 
ELSE (60 - DATEPART(MINUTE,CalcStartTime)) END AS CallTime,
DATEADD(minute, CASE WHEN DATEDIFF(MINUTE,CalcStartTime, EndDateTime) < 60 THEN DATEDIFF(MINUTE,CalcStartTime, EndDateTime) ELSE (60 - DATEPART(MINUTE,CalcStartTime)) END , CalcStartTime) AS CalcStartTime,
EndDateTime
FROM cte_break
WHERE CalcStartTime < EndDateTime

)
SELECT CallDate, CallHour, SUM(CallTime) AS CallTime FROM cte_break
GROUP BY CallDate, CallHour
ORDER BY CallDate, CallHour

答案 1 :(得分:0)

要摆脱光标/循环:不是按时间计算通话时间和小时数,而是需要列出所有可能的小时数(从最小值(StartHour)到最大值(EndHour))并计算每次通话每小时一次。可以把它想象成一个矩阵,其中列是小时,行是调用,只需将列相加即可得到每小时的通话时间。

ID Hour0 Hour1 Hour2 Hour3 ...
1 0 0 4 0
2 0 0 3 0
3 0 1 2 0
...

现在,需要进行更多的计数/计算(最多比循环方法多23倍),但从循环变为基于逻辑的逻辑的好处远远超过成本。

Procedural vs Set-Based SQL

这是部分解决方案(不能处理不同的日期):

DECLARE @maxHour INT, @minHour INT, @disHours INT

SELECT  @minHour = MIN(DATEPART(HOUR, StartDateTime)),
    @maxHour = MAX(DATEPART(HOUR, EndDateTime)),
    @disHours = @maxHour - @minHour + 1
FROM    PhoneCalls

-- Filling out a range from @minHour to @maxHour
SELECT TOP (@disHours) @minHour -1 + ROW_NUMBER() OVER (ORDER BY [object_id]) AS CallHour
INTO    #hours
FROM    sys.all_objects 
ORDER BY CallHour

-- Count the per call per hour call time
SELECT  CallHour,
    CASE
        WHEN DATEPART(HOUR, StartDateTime) < CallHour AND DATEPART(HOUR, EndDateTime) > CallHour THEN 60
        WHEN DATEPART(HOUR, StartDateTime) = CallHour AND DATEPART(HOUR, EndDateTime) = CallHour THEN DATEPART(MINUTE, EndDateTime) - DATEPART(MINUTE, StartDateTime)
        WHEN DATEPART(HOUR, StartDateTime) < CallHour AND DATEPART(HOUR, EndDateTime) = CallHour THEN DATEPART(MINUTE, EndDateTime)
        WHEN DATEPART(HOUR, StartDateTime) = CallHour AND DATEPART(HOUR, EndDateTime) > CallHour THEN 60 - DATEPART(MINUTE, StartDateTime)
        ELSE 0
    END     AS CallTimePerCallPerHour
INTO    #cross
FROM    PhoneCalls
    CROSS JOIN #hours

-- The final result
SELECT  CallHour,
    SUM(CallTimePerCallPerHour) AS CallTime
FROM    #cross
WHERE   CallTimePerCallPerHour > 0  -- To left out hours with 0 CallTime in the result
GROUP BY CallHour

DROP TABLE #hours
DROP TABLE #cross

对于大约1M行,随机开始/结束时间从00:00到23:59(基于集合的方法的最坏情况),运行时间从90秒降到20秒。

至于不同的日期,循环日期可能仍然是一个好主意,否则中间表会变得太大。

此外,可能会在不同日期开始和结束通话。处理这个问题可能很棘手,但是因为它们无论如何都会成为少数,所以可能只需要添加一个预处理步骤来挑选它们并将它们分开:

3/15/2016 23:49 - 3/16/2016 00:05 =>
3/15/2016 23:49 - 3/15/2016 23:60 // instead of 24:00 so we don't need to count hour 24
3/16/2016 00:00 - 3/16/2016 00:05