SQL Server 2016-24小时滑动窗口中的“运行计数和总和”

时间:2018-12-24 18:39:29

标签: sql-server tsql running-total

我试图计算24小时滑动窗口中的订单。我有一个'detetime'字段,正在计算以分钟为单位的24小时窗口汇总。每当两个连续订单之间的订单时间超过1440分钟或连续订单的运行时间超过1440分钟时,它应该重新开始计数。


环境是SQL Server 2016,我可以创建临时表,但不能创建物理表,也不能创建内存优化的对象(我想2012+上的任何东西都应该起作用)。
我在同一张表上尝试了一个内部联接,并使用了递归CTE,ROW_NUMBER等进行了测试,但是问题在于,对于24小时窗口和计算开始时间的基准时间,从没有固定的行数期间的变化。我唯一的常数是24小时的时间跨度。 尝试了以下内容:
https://www.red-gate.com/simple-talk/sql/t-sql-programming/calculating-values-within-a-rolling-window-in-transact-sql/
Calculate running total / running balance

跨应用程序似乎在大多数情况下都有效,但是在某些情况下-计算运行的24小时窗口时-并非如此。我尝试通过多种方式更改WHERE子句中的日期时间条件,但仍然不知道如何使其正常工作。
我曾想过要在https://blog.jooq.org/2015/05/12/use-this-neat-window-function-trick-to-calculate-time-differences-in-a-time-series/所示的24小时标记处创建一个重置事件,但此时我的大脑正在融化,甚至连逻辑都弄不清楚。

DROP TABLE IF EXISTS #Data

CREATE TABLE #Data
(
    START_TIME          DATETIME 
    ,ORDER_ID           NUMERIC(18,0)
    ,PROD_ID            NUMERIC(18,0)
    ,ACC_ID             NUMERIC(18,0)
);

INSERT INTO #Data
SELECT '2018-06-22 11:00:00.000', 198151606, 58666, 1601554883
UNION ALL SELECT '2018-07-09 10:15:00.000',2008873061,58666,1601554883
UNION ALL SELECT '2018-07-09 12:33:00.000',2009269222,58666,1601554883
UNION ALL SELECT '2018-07-10 08:29:00.000',2010735393,58666,1601554883
UNION ALL SELECT '2018-07-10 10:57:00.000',2010735584,58666,1601554883
UNION ALL SELECT '2018-06-27 23:53:00.000',1991467555,58666,2300231016
UNION ALL SELECT '2018-06-28 00:44:00.000',1991583916,58666,2300231016
UNION ALL SELECT '2018-07-04 04:15:00.000',2001154497,58666,2300231016
UNION ALL SELECT '2018-07-04 15:44:00.000',2001154818,58666,2300231016
UNION ALL SELECT '2018-07-04 21:30:00.000',2002057919,58666,2300231016
UNION ALL SELECT '2018-07-05 02:09:00.000',1200205808,58666,2300231016
UNION ALL SELECT '2018-07-05 04:15:00.000',2200205814,58666,2300231016
UNION ALL SELECT '2018-07-05 17:23:00.000',3200370070,58666,2300231016
UNION ALL SELECT '2018-07-05 18:07:00.000',4200370093,58666,2300231016
UNION ALL SELECT '2018-07-06 20:15:00.000',5200571962,58666,2300231016
UNION ALL SELECT '2018-07-07 07:45:00.000',6200571987,58666,2300231016
UNION ALL SELECT '2018-07-07 12:13:00.000',7200571993,58666,2300231016
UNION ALL SELECT '2018-07-09 18:29:00.000',8200939551,58666,2300231016
UNION ALL SELECT '2018-07-09 21:05:00.000',9200939552,58666,2300231016
UNION ALL SELECT '2018-07-11 21:31:00.000',2011107311,58666,2300231016
UNION ALL SELECT '2018-06-27 18:23:00.000',1991016382,58669,2300231016
UNION ALL SELECT '2018-06-27 19:07:00.000',1991181363,58669,2300231016
UNION ALL SELECT '2018-06-27 19:28:00.000',1991181374,58669,2300231016
UNION ALL SELECT '2018-06-28 01:44:00.000',1991583925,58669,2300231016
UNION ALL SELECT '2018-06-28 02:19:00.000',1991583946,58669,2300231016
UNION ALL SELECT '2018-07-03 10:15:00.000',1999231747,58669,2300231016
UNION ALL SELECT '2018-07-03 10:45:00.000',2000293678,58669,2300231016
UNION ALL SELECT '2018-07-03 14:22:00.000',200029380,58669,2300231016
UNION ALL SELECT '2018-07-04 19:45:00.000',2002057789,58669,2300231016
UNION ALL SELECT '2018-07-04 21:00:00.000',1200205781,58669,2300231016
UNION ALL SELECT '2018-07-05 15:12:00.000',2200254833,58669,2300231016
UNION ALL SELECT '2018-07-05 17:52:00.000',3200370071,58669,2300231016
UNION ALL SELECT '2018-07-09 22:30:00.000',4200939553,58669,2300231016
UNION ALL SELECT '2018-07-09 23:23:00.000',5200939566,58669,2300231016
UNION ALL SELECT '2018-07-30 17:45:00.000',6204364207,58666,2300231016
UNION ALL SELECT '2018-07-30 23:30:00.000',7204364211,58666,2300231016


;WITH TimeBetween AS(
SELECT  
    ACC_ID
    ,PROD_ID
    ,ORDER_ID
    ,START_TIME
    ,TIME_BETWEEN_ORDERS            = COALESCE(CASE WHEN DATEDIFF(MINUTE, LAG(START_TIME) OVER(PARTITION BY ACC_ID, PROD_ID
                                                                                ORDER BY START_TIME), START_TIME) >= 1440
                                            THEN 0 
                                            ELSE DATEDIFF(MINUTE, LAG(START_TIME) OVER(PARTITION BY ACC_ID, PROD_ID
                                                                                        ORDER BY START_TIME), START_TIME)
                                        END, 0)

FROM #Data
)

SELECT 
    TimeBetween.ACC_ID
    ,TimeBetween.PROD_ID
    ,TimeBetween.ORDER_ID
    ,TimeBetween.START_TIME
    ,TIME_BETWEEN_ORDERS

--Not working correctly, repeats the previous time at the end of the window when it should be 0.
    ,RUNNING_TIME_BETWEEN_ORDERS        = SUM(TIME_BETWEEN_ORDERS) OVER(PARTITION BY ACC_ID, PROD_ID ORDER BY START_TIME)

    ,Running24h.*

FROM TimeBetween

    CROSS APPLY(SELECT TOP 1
                    RUNNING_COUNT_24h                           = COUNT(*) OVER()  --Count admin units within the time window in the WHERE clause

            --Check what APPLY is returning for running time
                    ,RUNNING_TIME_BETWEEN_ORDERS_Apply          = DATEDIFF(MINUTE, StageBaseApply.START_TIME, TimeBetween.START_TIME)


             --Check what APPLY is using as base event anchor for the calculation   
                    ,START_TIME_Apply                           = StageBaseApply.START_TIME

                FROM #Data  AS StageBaseApply

                WHERE 
                    StageBaseApply.ACC_ID = TimeBetween.ACC_ID
                    AND StageBaseApply.PROD_ID = TimeBetween.PROD_ID
                    AND (StageBaseApply.START_TIME > DATEADD(MINUTE, -1440, TimeBetween.START_TIME) 
                            AND StageBaseApply.START_TIME <= TimeBetween.START_TIME
                            )

                ORDER BY StageBaseApply.START_TIME
                ) AS Running24h


ORDER BY ACC_ID,PROD_ID, START_TIME

当订单之间的运行时间超过24小时时,运行计数应从1重新开始。 当前,它会重复最后一个值,并且用于计算的时间似乎已关闭。

Current result from CROSS APPLY with notes on where it's not working and what it should be for what I'm trying to achieve

2 个答案:

答案 0 :(得分:0)

首先创建一个Numbers表,其中包含的行数至少要与您要处理的最大时间范围内的分钟数一样

CREATE TABLE dbo.Numbers(Number INT PRIMARY KEY);

WITH E1(N) AS 
(
    SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL 
    SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL 
    SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
)                                       -- 1*10^1 or 10 rows
, E2(N) AS (SELECT 1 FROM E1 a, E1 b)   -- 1*10^2 or 100 rows
, E4(N) AS (SELECT 1 FROM E2 a, E2 b)   -- 1*10^4 or 10,000 rows
, E8(N) AS (SELECT 1 FROM E4 a, E4 b)   -- 1*10^8 or 100,000,000 rows
, Nums AS (SELECT TOP (10000000) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS N FROM E8)
INSERT INTO dbo.Numbers
SELECT N
FROM Nums 

然后,您应该可以使用类似的方法(我假设所有开始时间都是精确的分钟,并且如示例数据中所示,每个ACC_ID,PROD_ID,START_TIME都没有重复项,如果需要的话在参与左联接之前在分钟级别进行预聚集)

WITH G
     AS (SELECT ACC_ID,
                PROD_ID,
                MIN = MIN(START_TIME),
                MAX = MAX(START_TIME),
                Range = DATEDIFF(MINUTE, MIN(START_TIME), MAX(START_TIME))
         FROM   #Data
         GROUP  BY ACC_ID,
                   PROD_ID),
     E
     AS (SELECT *
         FROM   G
                JOIN dbo.Numbers N
                  ON N.Number <= Range + 1),
   R AS (SELECT    E.ACC_ID,
                  E.PROD_ID,
                  D.START_TIME,
                  Cnt = COUNT(D.START_TIME) OVER (PARTITION BY E.ACC_ID, E.PROD_ID 
                                                      ORDER BY DATEADD(MINUTE, NUMBER-1, MIN) 
                                                  ROWS BETWEEN 1439 PRECEDING AND CURRENT ROW)
        FROM      E
        LEFT JOIN #Data D
        ON        D.ACC_ID = E.ACC_ID
                  AND D.PROD_ID = E.PROD_ID
                  AND D.START_TIME = DATEADD(MINUTE, NUMBER-1, MIN) )
SELECT   *
FROM     R
WHERE    START_TIME IS NOT NULL
ORDER BY ACC_ID,
         PROD_ID,
         START_TIME

答案 1 :(得分:0)

找到this post关于如何重置运行总和后,我想我也许终于可以解决这个问题了。不确定它的伸缩性如何,但它是否可以正常工作。

我还为订单数量添加了一个新列,因为有时跟踪在同一时间窗口内的总运行订单可能很有用。

可以在以下CASE语句中设置滑动时间窗口:
CASE WHEN RunningOrders.LAG_LESS_THAN_24h + NextEventLag.NEXT_ORDER_TIME_LAG >= 1440 THEN 0 ELSE RunningOrders.LAG_LESS_THAN_24h + NextEventLag.NEXT_ORDER_TIME_LAG END

DROP TABLE IF EXISTS #Data

CREATE TABLE #Data
(
    ORDER_TIME          DATETIME 
    ,ORDER_ID           NUMERIC(18,0)
    ,PROD_ID            NUMERIC(18,0)
    ,ACCOUNT_ID         NUMERIC(18,0)
    ,ORDER_QUANTITY     INT
);

INSERT INTO #Data
SELECT '2018-06-22 11:00:00.000', 1981516061, 158666, 1601554883,5
UNION ALL SELECT '2018-07-09 10:15:00.000',2008873062,158666,1601554883,3
UNION ALL SELECT '2018-07-09 12:33:00.000',2009269223,158666,1601554883,2
UNION ALL SELECT '2018-07-10 08:29:00.000',2010735394,158666,1601554883,4
UNION ALL SELECT '2018-07-10 10:57:00.000',2010735584,158666,1601554883,7
UNION ALL SELECT '2018-06-27 23:53:00.000',1991467553,158666,2300231016,6
UNION ALL SELECT '2018-06-28 00:44:00.000',1991583913,158666,2300231016,6
UNION ALL SELECT '2018-07-04 04:15:00.000',2001154492,158666,2300231016,4
UNION ALL SELECT '2018-07-04 15:44:00.000',2001154814,158666,2300231016,5
UNION ALL SELECT '2018-07-04 21:30:00.000',2002057915,158666,2300231016,4
UNION ALL SELECT '2018-07-05 02:09:00.000',2002058086,158666,2300231016,4
UNION ALL SELECT '2018-07-05 04:15:00.000',2002058147,158666,2300231016,3
UNION ALL SELECT '2018-07-05 17:23:00.000',2003700706,158666,2300231016,2
UNION ALL SELECT '2018-07-05 18:07:00.000',2003700938,158666,2300231016,1
UNION ALL SELECT '2018-07-06 20:15:00.000',2005719626,158666,2300231016,7
UNION ALL SELECT '2018-07-07 07:45:00.000',2005719879,158666,2300231016,8
UNION ALL SELECT '2018-07-07 12:13:00.000',2005719931,158666,2300231016,9
UNION ALL SELECT '2018-07-09 18:29:00.000',2009395510,158666,2300231016,8
UNION ALL SELECT '2018-07-09 21:05:00.000',2009395523,158666,2300231016,6
UNION ALL SELECT '2018-07-11 21:31:00.000',2011107312,158666,2300231016,5
UNION ALL SELECT '2018-06-27 18:23:00.000',1991016381,258669,2300231016,4
UNION ALL SELECT '2018-06-27 19:07:00.000',1991181365,258669,2300231016,4
UNION ALL SELECT '2018-06-27 19:28:00.000',1991181376,258669,2300231016,3
UNION ALL SELECT '2018-06-28 01:44:00.000',1991583923,258669,2300231016,9
UNION ALL SELECT '2018-06-28 02:19:00.000',1991583943,258669,2300231016,2
UNION ALL SELECT '2018-07-03 10:15:00.000',1999231742,258669,2300231016,1
UNION ALL SELECT '2018-07-03 10:45:00.000',2000293679,258669,2300231016,1
UNION ALL SELECT '2018-07-03 14:22:00.000',2000293804,258669,2300231016,3
UNION ALL SELECT '2018-07-04 19:45:00.000',2002057785,258669,2300231016,2
UNION ALL SELECT '2018-07-04 21:00:00.000',2002057813,258669,2300231016,1
UNION ALL SELECT '2018-07-05 15:12:00.000',2002548332,258669,2300231016,7
UNION ALL SELECT '2018-07-05 17:52:00.000',2003700719,258669,2300231016,6
UNION ALL SELECT '2018-07-09 22:30:00.000',2009395530,258669,2300231016,5
UNION ALL SELECT '2018-07-09 23:23:00.000',2009395666,258669,2300231016,3
UNION ALL SELECT '2018-07-30 17:45:00.000',2043642075,158666,2300231016,2
UNION ALL SELECT '2018-07-30 23:30:00.000',2043642114,158666,2300231016,4




;WITH NextEventLag AS(
--Returns the next event information.
SELECT 
    ORDER_TIME  
    ,ORDER_ID
    ,PROD_ID    
    ,ACCOUNT_ID 
    ,RowNum                     = ROW_NUMBER() OVER(PARTITION BY ACCOUNT_ID, PROD_ID ORDER BY ORDER_TIME)

--NEXT_ORDER_TIME_LAG: Returns the time difference between two consecutive order times.
    ,NEXT_ORDER_TIME_LAG        = DATEDIFF(MINUTE, LAG(ORDER_TIME, 1, ORDER_TIME) OVER(PARTITION BY ACCOUNT_ID, PROD_ID ORDER BY ORDER_TIME), ORDER_TIME)

    ,ORDER_QUANTITY

FROM #Data
)

,RunningOrders AS(
SELECT 
    RowNum
    ,ORDER_TIME
    ,ACCOUNT_ID
    ,PROD_ID
    ,NEXT_ORDER_TIME_LAG
    ,LAG_LESS_THAN_24h              = 0
    ,ORDER_QUANTITY

FROM NextEventLag

WHERE RowNum = 1


UNION ALL

SELECT 
    NextEventLag.RowNum
    ,NextEventLag.ORDER_TIME
    ,NextEventLag.ACCOUNT_ID
    ,NextEventLag.PROD_ID
    ,NextEventLag.NEXT_ORDER_TIME_LAG

--If the time lag between consecutive events and the time running sum is over 1440 minutes then set the value to 0. 
--Change the NEXT_ORDER_TIME_LAG time interval to the desired interval value in minutes.
    ,LAG_LESS_THAN_24h              = CASE WHEN RunningOrders.LAG_LESS_THAN_24h + NextEventLag.NEXT_ORDER_TIME_LAG >= 1440 THEN 0 
                                       ELSE RunningOrders.LAG_LESS_THAN_24h + NextEventLag.NEXT_ORDER_TIME_LAG
                                      END
    ,NextEventLag.ORDER_QUANTITY

FROM RunningOrders
    INNER JOIN NextEventLag             ON RunningOrders.RowNum + 1 = NextEventLag.RowNum
                                        AND RunningOrders.ACCOUNT_ID = NextEventLag.ACCOUNT_ID
                                        AND RunningOrders.PROD_ID = NextEventLag.PROD_ID
)

,GroupedLags AS(
--This Groups together the LAG(s) less than 1440 minutes and is used by the outer query window functions
--to calculate the running aggregates. 
SELECT RunningOrders.* 
    ,Running24h.*

FROM RunningOrders

    CROSS APPLY(SELECT TOP 1
                    Groups                          = COUNT(*) OVER(ORDER BY GroupApply.LAG_LESS_THAN_24h)  --Count admin units within the time window in the WHERE clause

                FROM RunningOrders  AS GroupApply

                WHERE 
                    GroupApply.ACCOUNT_ID = RunningOrders.ACCOUNT_ID
                    AND GroupApply.PROD_ID = RunningOrders.PROD_ID
                    AND GroupApply.ORDER_TIME <= RunningOrders.ORDER_TIME

                --ORDER BY StageBaseApply.ORDER_TIME
                ) AS Running24h
)


select 
    GroupedLags.ACCOUNT_ID
    ,GroupedLags.PROD_ID
    ,GroupedLags.ORDER_TIME
    ,GroupedLags.NEXT_ORDER_TIME_LAG
    ,GroupedLags.LAG_LESS_THAN_24h
    ,RUNNING_COUNT_24h                              = ROW_NUMBER() OVER(PARTITION BY GroupedLags.ACCOUNT_ID, GroupedLags.PROD_ID, GroupedLags.Groups ORDER BY GroupedLags.ORDER_TIME)
    ,RUNNING_SUM_24h                                = SUM(ORDER_QUANTITY) OVER(PARTITION BY GroupedLags.ACCOUNT_ID, GroupedLags.PROD_ID, GroupedLags.Groups ORDER BY GroupedLags.ORDER_TIME) 

from GroupedLags

ORDER BY 
    GroupedLags.ACCOUNT_ID
    ,GroupedLags.PROD_ID
    ,GroupedLags.ORDER_TIME

这里是db<>fiddle demo