我试图计算24小时滑动窗口中的订单。我有一个'detetime'字段,正在计算以分钟为单位的24小时窗口汇总。每当两个连续订单之间的订单时间超过1440分钟或连续订单的运行时间超过1440分钟时,它应该重新开始计数。
环境是SQL Server 2016,我可以创建临时表,但不能创建物理表,也不能创建内存优化的对象(我想2012+上的任何东西都应该起作用)。
我在同一张表上尝试了一个内部联接,并使用了递归CTE,ROW_NUMBER等进行了测试,但是问题在于,对于24小时窗口和计算开始时间的基准时间,从没有固定的行数期间的变化。我唯一的常数是24小时的时间跨度。
尝试了以下内容:
https://www.red-gate.com/simple-talk/sql/t-sql-programming/calculating-values-within-a-rolling-window-in-transact-sql/
Calculate running total / running balance
跨应用程序似乎在大多数情况下都有效,但是在某些情况下-计算运行的24小时窗口时-并非如此。我尝试通过多种方式更改WHERE子句中的日期时间条件,但仍然不知道如何使其正常工作。
我曾想过要在https://blog.jooq.org/2015/05/12/use-this-neat-window-function-trick-to-calculate-time-differences-in-a-time-series/所示的24小时标记处创建一个重置事件,但此时我的大脑正在融化,甚至连逻辑都弄不清楚。
DROP TABLE IF EXISTS #Data
CREATE TABLE #Data
(
START_TIME DATETIME
,ORDER_ID NUMERIC(18,0)
,PROD_ID NUMERIC(18,0)
,ACC_ID NUMERIC(18,0)
);
INSERT INTO #Data
SELECT '2018-06-22 11:00:00.000', 198151606, 58666, 1601554883
UNION ALL SELECT '2018-07-09 10:15:00.000',2008873061,58666,1601554883
UNION ALL SELECT '2018-07-09 12:33:00.000',2009269222,58666,1601554883
UNION ALL SELECT '2018-07-10 08:29:00.000',2010735393,58666,1601554883
UNION ALL SELECT '2018-07-10 10:57:00.000',2010735584,58666,1601554883
UNION ALL SELECT '2018-06-27 23:53:00.000',1991467555,58666,2300231016
UNION ALL SELECT '2018-06-28 00:44:00.000',1991583916,58666,2300231016
UNION ALL SELECT '2018-07-04 04:15:00.000',2001154497,58666,2300231016
UNION ALL SELECT '2018-07-04 15:44:00.000',2001154818,58666,2300231016
UNION ALL SELECT '2018-07-04 21:30:00.000',2002057919,58666,2300231016
UNION ALL SELECT '2018-07-05 02:09:00.000',1200205808,58666,2300231016
UNION ALL SELECT '2018-07-05 04:15:00.000',2200205814,58666,2300231016
UNION ALL SELECT '2018-07-05 17:23:00.000',3200370070,58666,2300231016
UNION ALL SELECT '2018-07-05 18:07:00.000',4200370093,58666,2300231016
UNION ALL SELECT '2018-07-06 20:15:00.000',5200571962,58666,2300231016
UNION ALL SELECT '2018-07-07 07:45:00.000',6200571987,58666,2300231016
UNION ALL SELECT '2018-07-07 12:13:00.000',7200571993,58666,2300231016
UNION ALL SELECT '2018-07-09 18:29:00.000',8200939551,58666,2300231016
UNION ALL SELECT '2018-07-09 21:05:00.000',9200939552,58666,2300231016
UNION ALL SELECT '2018-07-11 21:31:00.000',2011107311,58666,2300231016
UNION ALL SELECT '2018-06-27 18:23:00.000',1991016382,58669,2300231016
UNION ALL SELECT '2018-06-27 19:07:00.000',1991181363,58669,2300231016
UNION ALL SELECT '2018-06-27 19:28:00.000',1991181374,58669,2300231016
UNION ALL SELECT '2018-06-28 01:44:00.000',1991583925,58669,2300231016
UNION ALL SELECT '2018-06-28 02:19:00.000',1991583946,58669,2300231016
UNION ALL SELECT '2018-07-03 10:15:00.000',1999231747,58669,2300231016
UNION ALL SELECT '2018-07-03 10:45:00.000',2000293678,58669,2300231016
UNION ALL SELECT '2018-07-03 14:22:00.000',200029380,58669,2300231016
UNION ALL SELECT '2018-07-04 19:45:00.000',2002057789,58669,2300231016
UNION ALL SELECT '2018-07-04 21:00:00.000',1200205781,58669,2300231016
UNION ALL SELECT '2018-07-05 15:12:00.000',2200254833,58669,2300231016
UNION ALL SELECT '2018-07-05 17:52:00.000',3200370071,58669,2300231016
UNION ALL SELECT '2018-07-09 22:30:00.000',4200939553,58669,2300231016
UNION ALL SELECT '2018-07-09 23:23:00.000',5200939566,58669,2300231016
UNION ALL SELECT '2018-07-30 17:45:00.000',6204364207,58666,2300231016
UNION ALL SELECT '2018-07-30 23:30:00.000',7204364211,58666,2300231016
;WITH TimeBetween AS(
SELECT
ACC_ID
,PROD_ID
,ORDER_ID
,START_TIME
,TIME_BETWEEN_ORDERS = COALESCE(CASE WHEN DATEDIFF(MINUTE, LAG(START_TIME) OVER(PARTITION BY ACC_ID, PROD_ID
ORDER BY START_TIME), START_TIME) >= 1440
THEN 0
ELSE DATEDIFF(MINUTE, LAG(START_TIME) OVER(PARTITION BY ACC_ID, PROD_ID
ORDER BY START_TIME), START_TIME)
END, 0)
FROM #Data
)
SELECT
TimeBetween.ACC_ID
,TimeBetween.PROD_ID
,TimeBetween.ORDER_ID
,TimeBetween.START_TIME
,TIME_BETWEEN_ORDERS
--Not working correctly, repeats the previous time at the end of the window when it should be 0.
,RUNNING_TIME_BETWEEN_ORDERS = SUM(TIME_BETWEEN_ORDERS) OVER(PARTITION BY ACC_ID, PROD_ID ORDER BY START_TIME)
,Running24h.*
FROM TimeBetween
CROSS APPLY(SELECT TOP 1
RUNNING_COUNT_24h = COUNT(*) OVER() --Count admin units within the time window in the WHERE clause
--Check what APPLY is returning for running time
,RUNNING_TIME_BETWEEN_ORDERS_Apply = DATEDIFF(MINUTE, StageBaseApply.START_TIME, TimeBetween.START_TIME)
--Check what APPLY is using as base event anchor for the calculation
,START_TIME_Apply = StageBaseApply.START_TIME
FROM #Data AS StageBaseApply
WHERE
StageBaseApply.ACC_ID = TimeBetween.ACC_ID
AND StageBaseApply.PROD_ID = TimeBetween.PROD_ID
AND (StageBaseApply.START_TIME > DATEADD(MINUTE, -1440, TimeBetween.START_TIME)
AND StageBaseApply.START_TIME <= TimeBetween.START_TIME
)
ORDER BY StageBaseApply.START_TIME
) AS Running24h
ORDER BY ACC_ID,PROD_ID, START_TIME
当订单之间的运行时间超过24小时时,运行计数应从1重新开始。 当前,它会重复最后一个值,并且用于计算的时间似乎已关闭。
答案 0 :(得分:0)
首先创建一个Numbers表,其中包含的行数至少要与您要处理的最大时间范围内的分钟数一样
CREATE TABLE dbo.Numbers(Number INT PRIMARY KEY);
WITH E1(N) AS
(
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
) -- 1*10^1 or 10 rows
, E2(N) AS (SELECT 1 FROM E1 a, E1 b) -- 1*10^2 or 100 rows
, E4(N) AS (SELECT 1 FROM E2 a, E2 b) -- 1*10^4 or 10,000 rows
, E8(N) AS (SELECT 1 FROM E4 a, E4 b) -- 1*10^8 or 100,000,000 rows
, Nums AS (SELECT TOP (10000000) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS N FROM E8)
INSERT INTO dbo.Numbers
SELECT N
FROM Nums
然后,您应该可以使用类似的方法(我假设所有开始时间都是精确的分钟,并且如示例数据中所示,每个ACC_ID,PROD_ID,START_TIME
都没有重复项,如果需要的话在参与左联接之前在分钟级别进行预聚集)
WITH G
AS (SELECT ACC_ID,
PROD_ID,
MIN = MIN(START_TIME),
MAX = MAX(START_TIME),
Range = DATEDIFF(MINUTE, MIN(START_TIME), MAX(START_TIME))
FROM #Data
GROUP BY ACC_ID,
PROD_ID),
E
AS (SELECT *
FROM G
JOIN dbo.Numbers N
ON N.Number <= Range + 1),
R AS (SELECT E.ACC_ID,
E.PROD_ID,
D.START_TIME,
Cnt = COUNT(D.START_TIME) OVER (PARTITION BY E.ACC_ID, E.PROD_ID
ORDER BY DATEADD(MINUTE, NUMBER-1, MIN)
ROWS BETWEEN 1439 PRECEDING AND CURRENT ROW)
FROM E
LEFT JOIN #Data D
ON D.ACC_ID = E.ACC_ID
AND D.PROD_ID = E.PROD_ID
AND D.START_TIME = DATEADD(MINUTE, NUMBER-1, MIN) )
SELECT *
FROM R
WHERE START_TIME IS NOT NULL
ORDER BY ACC_ID,
PROD_ID,
START_TIME
答案 1 :(得分:0)
找到this post关于如何重置运行总和后,我想我也许终于可以解决这个问题了。不确定它的伸缩性如何,但它是否可以正常工作。
我还为订单数量添加了一个新列,因为有时跟踪在同一时间窗口内的总运行订单可能很有用。
可以在以下CASE
语句中设置滑动时间窗口:
CASE WHEN RunningOrders.LAG_LESS_THAN_24h + NextEventLag.NEXT_ORDER_TIME_LAG >= 1440 THEN 0 ELSE RunningOrders.LAG_LESS_THAN_24h + NextEventLag.NEXT_ORDER_TIME_LAG
END
DROP TABLE IF EXISTS #Data
CREATE TABLE #Data
(
ORDER_TIME DATETIME
,ORDER_ID NUMERIC(18,0)
,PROD_ID NUMERIC(18,0)
,ACCOUNT_ID NUMERIC(18,0)
,ORDER_QUANTITY INT
);
INSERT INTO #Data
SELECT '2018-06-22 11:00:00.000', 1981516061, 158666, 1601554883,5
UNION ALL SELECT '2018-07-09 10:15:00.000',2008873062,158666,1601554883,3
UNION ALL SELECT '2018-07-09 12:33:00.000',2009269223,158666,1601554883,2
UNION ALL SELECT '2018-07-10 08:29:00.000',2010735394,158666,1601554883,4
UNION ALL SELECT '2018-07-10 10:57:00.000',2010735584,158666,1601554883,7
UNION ALL SELECT '2018-06-27 23:53:00.000',1991467553,158666,2300231016,6
UNION ALL SELECT '2018-06-28 00:44:00.000',1991583913,158666,2300231016,6
UNION ALL SELECT '2018-07-04 04:15:00.000',2001154492,158666,2300231016,4
UNION ALL SELECT '2018-07-04 15:44:00.000',2001154814,158666,2300231016,5
UNION ALL SELECT '2018-07-04 21:30:00.000',2002057915,158666,2300231016,4
UNION ALL SELECT '2018-07-05 02:09:00.000',2002058086,158666,2300231016,4
UNION ALL SELECT '2018-07-05 04:15:00.000',2002058147,158666,2300231016,3
UNION ALL SELECT '2018-07-05 17:23:00.000',2003700706,158666,2300231016,2
UNION ALL SELECT '2018-07-05 18:07:00.000',2003700938,158666,2300231016,1
UNION ALL SELECT '2018-07-06 20:15:00.000',2005719626,158666,2300231016,7
UNION ALL SELECT '2018-07-07 07:45:00.000',2005719879,158666,2300231016,8
UNION ALL SELECT '2018-07-07 12:13:00.000',2005719931,158666,2300231016,9
UNION ALL SELECT '2018-07-09 18:29:00.000',2009395510,158666,2300231016,8
UNION ALL SELECT '2018-07-09 21:05:00.000',2009395523,158666,2300231016,6
UNION ALL SELECT '2018-07-11 21:31:00.000',2011107312,158666,2300231016,5
UNION ALL SELECT '2018-06-27 18:23:00.000',1991016381,258669,2300231016,4
UNION ALL SELECT '2018-06-27 19:07:00.000',1991181365,258669,2300231016,4
UNION ALL SELECT '2018-06-27 19:28:00.000',1991181376,258669,2300231016,3
UNION ALL SELECT '2018-06-28 01:44:00.000',1991583923,258669,2300231016,9
UNION ALL SELECT '2018-06-28 02:19:00.000',1991583943,258669,2300231016,2
UNION ALL SELECT '2018-07-03 10:15:00.000',1999231742,258669,2300231016,1
UNION ALL SELECT '2018-07-03 10:45:00.000',2000293679,258669,2300231016,1
UNION ALL SELECT '2018-07-03 14:22:00.000',2000293804,258669,2300231016,3
UNION ALL SELECT '2018-07-04 19:45:00.000',2002057785,258669,2300231016,2
UNION ALL SELECT '2018-07-04 21:00:00.000',2002057813,258669,2300231016,1
UNION ALL SELECT '2018-07-05 15:12:00.000',2002548332,258669,2300231016,7
UNION ALL SELECT '2018-07-05 17:52:00.000',2003700719,258669,2300231016,6
UNION ALL SELECT '2018-07-09 22:30:00.000',2009395530,258669,2300231016,5
UNION ALL SELECT '2018-07-09 23:23:00.000',2009395666,258669,2300231016,3
UNION ALL SELECT '2018-07-30 17:45:00.000',2043642075,158666,2300231016,2
UNION ALL SELECT '2018-07-30 23:30:00.000',2043642114,158666,2300231016,4
;WITH NextEventLag AS(
--Returns the next event information.
SELECT
ORDER_TIME
,ORDER_ID
,PROD_ID
,ACCOUNT_ID
,RowNum = ROW_NUMBER() OVER(PARTITION BY ACCOUNT_ID, PROD_ID ORDER BY ORDER_TIME)
--NEXT_ORDER_TIME_LAG: Returns the time difference between two consecutive order times.
,NEXT_ORDER_TIME_LAG = DATEDIFF(MINUTE, LAG(ORDER_TIME, 1, ORDER_TIME) OVER(PARTITION BY ACCOUNT_ID, PROD_ID ORDER BY ORDER_TIME), ORDER_TIME)
,ORDER_QUANTITY
FROM #Data
)
,RunningOrders AS(
SELECT
RowNum
,ORDER_TIME
,ACCOUNT_ID
,PROD_ID
,NEXT_ORDER_TIME_LAG
,LAG_LESS_THAN_24h = 0
,ORDER_QUANTITY
FROM NextEventLag
WHERE RowNum = 1
UNION ALL
SELECT
NextEventLag.RowNum
,NextEventLag.ORDER_TIME
,NextEventLag.ACCOUNT_ID
,NextEventLag.PROD_ID
,NextEventLag.NEXT_ORDER_TIME_LAG
--If the time lag between consecutive events and the time running sum is over 1440 minutes then set the value to 0.
--Change the NEXT_ORDER_TIME_LAG time interval to the desired interval value in minutes.
,LAG_LESS_THAN_24h = CASE WHEN RunningOrders.LAG_LESS_THAN_24h + NextEventLag.NEXT_ORDER_TIME_LAG >= 1440 THEN 0
ELSE RunningOrders.LAG_LESS_THAN_24h + NextEventLag.NEXT_ORDER_TIME_LAG
END
,NextEventLag.ORDER_QUANTITY
FROM RunningOrders
INNER JOIN NextEventLag ON RunningOrders.RowNum + 1 = NextEventLag.RowNum
AND RunningOrders.ACCOUNT_ID = NextEventLag.ACCOUNT_ID
AND RunningOrders.PROD_ID = NextEventLag.PROD_ID
)
,GroupedLags AS(
--This Groups together the LAG(s) less than 1440 minutes and is used by the outer query window functions
--to calculate the running aggregates.
SELECT RunningOrders.*
,Running24h.*
FROM RunningOrders
CROSS APPLY(SELECT TOP 1
Groups = COUNT(*) OVER(ORDER BY GroupApply.LAG_LESS_THAN_24h) --Count admin units within the time window in the WHERE clause
FROM RunningOrders AS GroupApply
WHERE
GroupApply.ACCOUNT_ID = RunningOrders.ACCOUNT_ID
AND GroupApply.PROD_ID = RunningOrders.PROD_ID
AND GroupApply.ORDER_TIME <= RunningOrders.ORDER_TIME
--ORDER BY StageBaseApply.ORDER_TIME
) AS Running24h
)
select
GroupedLags.ACCOUNT_ID
,GroupedLags.PROD_ID
,GroupedLags.ORDER_TIME
,GroupedLags.NEXT_ORDER_TIME_LAG
,GroupedLags.LAG_LESS_THAN_24h
,RUNNING_COUNT_24h = ROW_NUMBER() OVER(PARTITION BY GroupedLags.ACCOUNT_ID, GroupedLags.PROD_ID, GroupedLags.Groups ORDER BY GroupedLags.ORDER_TIME)
,RUNNING_SUM_24h = SUM(ORDER_QUANTITY) OVER(PARTITION BY GroupedLags.ACCOUNT_ID, GroupedLags.PROD_ID, GroupedLags.Groups ORDER BY GroupedLags.ORDER_TIME)
from GroupedLags
ORDER BY
GroupedLags.ACCOUNT_ID
,GroupedLags.PROD_ID
,GroupedLags.ORDER_TIME