我正在尝试根据服务记录来计算火车的停机时间,以下是示例场景
一列火车可能同时运行多个作业,有时可能会重叠
针对:
因此,实际的停机时间应为360分钟(作业1)+ 0分钟(作业2)+ 10分钟(作业3)= 370分钟
我想要的输出是:-
截至目前,我有20列火车,我需要如上所述计算停机时间
我该怎么做?
样本数据脚本:
CREATE TABLE [dbo].[tb_ServiceMemo](
[Job_Number] [nvarchar](500) NULL,
[Train_Number] [nvarchar](500) NULL,
[Work_Start_Date] [datetime] NULL,
[Work_Completed_Date] [datetime] NULL
) ON [PRIMARY]
INSERT INTO [dbo].[tb_ServiceMemo]
VALUES (1,1,'01-08-2018 12:35','01-08-18 18:35'),
(2,1,'01-08-2018 14:20','01-08-18 15:20'),
(3,1,'01-08-2018 18:00','01-08-18 18:45')
答案 0 :(得分:1)
这是序列中的空白和空白问题。
您可以尝试使用递归CTE,获取每一行的时间。
然后使用每个MAX
和MIN
日期时间来计算结果。
;WITH CTE AS (
SELECT [Train_Number], [Work_Start_Date] ,[Work_Completed_Date]
FROM [tb_ServiceMemo]
UNION ALL
SELECT [Train_Number], DATEADD(minute,1,[Work_Start_Date]) ,[Work_Completed_Date]
FROM CTE
WHERE DATEADD(minute,1,[Work_Start_Date]) <= [Work_Completed_Date]
),CTE2 AS (
SELECT DISTINCT Train_Number,
Work_Start_Date,
MAX(Work_Completed_Date) OVER(PARTITION BY Train_Number ORDER BY Work_Completed_Date DESC) MAX_Time
FROM CTE
),CTE_RESULT AS (
SELECT *,datediff(mi,MAX_Time,Work_Start_Date) - row_number() over(PARTITION BY Train_Number ORDER BY Work_Start_Date) grp
FROM CTE2
)
SELECT Train_Number,sum(time_diff)
FROM (
SELECT Train_Number,DATEDIFF(MI,MIN(Work_Start_Date),MAX(Work_Start_Date)) time_diff
FROM CTE_RESULT
GROUP BY Train_Number,grp
)t1
GROUP BY Train_Number
option ( MaxRecursion 0 );
答案 1 :(得分:1)
这是臭名昭著的日期差距和岛屿问题。以下是使用递归CTE的解决方案。如果您不习惯与他们合作,可能很难理解。我评论了可能需要澄清的所有部分。
我还添加了一些示例来考虑不同的情况,例如时段上的不同天数以及确切的开始/结束时间重叠。
示例设置:
IF OBJECT_ID('tempdb..#tb_ServiceMemo') IS NOT NULL
DROP TABLE #tb_ServiceMemo
CREATE TABLE #tb_ServiceMemo(
Job_Number INT, -- This is an INT not VARCHAR!! (even the name says so)
Train_Number INT, -- This one also!!
Work_Start_Date DATETIME,
Work_Completed_Date DATETIME)
INSERT INTO #tb_ServiceMemo (
Job_Number,
Train_Number,
Work_Start_Date,
Work_Completed_Date)
VALUES
-- Total time train 1: 6h 10m (370m)
(1,1,'2018-08-01 12:35','2018-08-01 18:35'), -- Make sure to write date literals in ISO format (yyyy-MM-dd) to avoid multiple interpretations
(2,1,'2018-08-01 14:20','2018-08-01 15:20'),
(3,1,'2018-08-01 18:00','2018-08-01 18:45'),
-- Total time train 2: 2h (120m)
(4,2,'2018-08-01 12:00','2018-08-01 12:10'),
(5,2,'2018-08-01 12:15','2018-08-01 12:20'),
(6,2,'2018-08-01 13:15','2018-08-01 13:45'),
(9,2,'2018-08-01 13:45','2018-08-01 15:00'),
-- Total time train 3: 3h 45m (225m)
(7,3,'2018-08-01 23:30','2018-08-02 00:30'),
(8,3,'2018-08-02 00:15','2018-08-02 03:15'),
-- Total time train 4: 2d 8h 15m (3375m)
(10,4,'2018-08-01 23:00','2018-08-03 23:00'),
(11,4,'2018-08-02 00:15','2018-08-04 07:15')
解决方案:
;WITH TimeLapses AS
(
-- Recursive Anchor: Find the minimum Jobs for each train that doesn't overlap with previous Jobs
SELECT
InitialJobNumber = T.Job_Number,
JobNumber = T.Job_Number,
TrainNumber = T.Train_Number,
IntervalStart = T.Work_Start_Date,
IntervalEnd = T.Work_Completed_Date,
JobExtensionPath = CONVERT(VARCHAR(MAX), T.Job_Number), -- Will store the chained jobs together for clarity
RecursionLevel = 1
FROM
#tb_ServiceMemo AS T
WHERE
NOT EXISTS (
SELECT
'Job doesn''t overlap with previous Jobs (by train)'
FROM
#tb_ServiceMemo AS S
WHERE
S.Train_Number = T.Train_Number AND
S.Job_Number < T.Job_Number AND
S.Work_Completed_Date >= T.Work_Start_Date AND -- Conditions for the periods to overlap
S.Work_Start_Date <= T.Work_Completed_Date)
UNION ALL
-- Recursive Union: Chain overlapping Jobs by train and keep intervals boundaries (min & max)
SELECT
InitialJobNumber = L.InitialJobNumber,
JobNumber = T.Job_Number,
TrainNumber = L.TrainNumber,
IntervalStart = CASE -- Minimum of both starts
WHEN L.IntervalStart <= T.Work_Start_Date THEN L.IntervalStart
ELSE T.Work_Start_Date END,
IntervalEnd = CASE -- Maximum of both ends
WHEN L.IntervalEnd >= T.Work_Completed_Date THEN L.IntervalEnd
ELSE T.Work_Completed_Date END,
JobExtensionPath = L.JobExtensionPath + '->' + CONVERT(VARCHAR(MAX), T.Job_Number),
RecursionLevel = L.RecursionLevel + 1
FROM
TimeLapses AS L -- Recursive CTE!
INNER JOIN #tb_ServiceMemo AS T ON
L.TrainNumber = T.Train_Number AND
T.Work_Completed_Date >= L.IntervalStart AND -- Conditions for the periods to overlap
T.Work_Start_Date <= L.IntervalEnd
WHERE
L.JobNumber < T.Job_Number -- Prevent joining in both directions (that would be "<>") to avoid infinite loops
),
MaxRecursionLevelByTrain AS
(
/*
Max recursion level will hold the longest interval for each train, as there might be recursive paths that skips some jobs. For example: Train 1's job 1 will
join with Job 2 and Job 3 on the first recursive level, then Job 2 will join with Job 3 on the next recursion. The higher the recursion level the more Jobs we
are taking into account for the longest interval.
We also need to group by InitialJobNumber as there might be different, idependent gaps for each train.
*/
SELECT
TrainNumber = T.TrainNumber,
InitialJobNumber = T.InitialJobNumber,
MaxRecursionLevel = MAX(T.RecursionLevel)
FROM
TimeLapses AS T
GROUP BY
T.TrainNumber,
T.InitialJobNumber
),
ExpandedLapses AS
(
SELECT
TrainNumber = T.TrainNumber,
InitialJobNumber = M.InitialJobNumber,
IntervalStart = T.IntervalStart,
IntervalEnd = T.IntervalEnd,
DownTime = DATEDIFF(MINUTE, T.IntervalStart, T.IntervalEnd),
JobExtensionPath = T.JobExtensionPath,
RecursionLevel = T.RecursionLevel
FROM
MaxRecursionLevelByTrain AS M
INNER JOIN TimeLapses AS T ON
M.TrainNumber = T.TrainNumber AND
M.MaxRecursionLevel = T.RecursionLevel AND
M.InitialJobNumber = T.InitialJobNumber
)
SELECT
TrainNumber = E.TrainNumber,
TotalDownTime = SUM(DownTime)
FROM
ExpandedLapses AS E
GROUP BY
E.TrainNumber
这些是每个CTE的部分结果,因此您可以看到每个步骤:
延时拍摄:
InitialJobNumber JobNumber TrainNumber IntervalStart IntervalEnd JobExtensionPath RecursionLevel
1 1 1 2018-08-01 12:35:00.000 2018-08-01 18:35:00.000 1 1
1 2 1 2018-08-01 12:35:00.000 2018-08-01 18:35:00.000 1->2 2
1 3 1 2018-08-01 12:35:00.000 2018-08-01 18:45:00.000 1->3 2
1 3 1 2018-08-01 12:35:00.000 2018-08-01 18:45:00.000 1->2->3 3
4 4 2 2018-08-01 12:00:00.000 2018-08-01 12:10:00.000 4 1
5 5 2 2018-08-01 12:15:00.000 2018-08-01 12:20:00.000 5 1
6 6 2 2018-08-01 13:15:00.000 2018-08-01 13:45:00.000 6 1
6 9 2 2018-08-01 13:15:00.000 2018-08-01 15:00:00.000 6->9 2
7 8 3 2018-08-01 23:30:00.000 2018-08-02 03:15:00.000 7->8 2
7 7 3 2018-08-01 23:30:00.000 2018-08-02 00:30:00.000 7 1
10 10 4 2018-08-01 23:00:00.000 2018-08-03 23:00:00.000 10 1
10 11 4 2018-08-01 23:00:00.000 2018-08-04 07:15:00.000 10->11 2
MaxRecursionLevelByTrain :
TrainNumber InitialJobNumber MaxRecursionLevel
1 1 3
2 4 1
2 5 1
2 6 2
3 7 2
4 10 2
扩展失效:
TrainNumber InitialJobNumber IntervalStart IntervalEnd DownTime JobExtensionPath RecursionLevel
1 1 2018-08-01 12:35:00.000 2018-08-01 18:45:00.000 370 1->2->3 3
2 4 2018-08-01 12:00:00.000 2018-08-01 12:10:00.000 10 4 1
2 5 2018-08-01 12:15:00.000 2018-08-01 12:20:00.000 5 5 1
2 6 2018-08-01 13:15:00.000 2018-08-01 15:00:00.000 105 6->9 2
3 7 2018-08-01 23:30:00.000 2018-08-02 03:15:00.000 225 7->8 2
4 10 2018-08-01 23:00:00.000 2018-08-04 07:15:00.000 3375 10->11 2
最终结果:
TrainNumber TotalDownTime
1 370
2 120
3 225
4 3375
几件事值得一提:
#tb_ServiceMemo
(Train_Number, Job_Number, Work_Start_Date
)上的索引来加快查询速度。OPTION (MAXRECURSION N)
语句的末尾添加SELECT
,这是N
您要尝试的最大递归级别。默认值为100
,因此对于特定火车,如果有超过100个时间段链接在一起,则会弹出错误消息。您可以将0
用作N
来无限使用。答案 2 :(得分:1)
这是一个“孤岛”问题,但是棘手,因为它有开始时间和结束时间。
该解决方案的想法是确定何时开始中断。有什么特点?好吧,这段时期始于与先前工作没有重叠的时间。棘手的是,可以同时进行多个“工作”工作(尽管您的数据没有显示出这一点)。
一旦知道中断发生的时间,就可以使用累积总和为每个记录分配一个组,然后简单地按该组(和其他信息)进行汇总。
以下查询应执行您想要的操作:
with starts as (
select sm.*,
(case when exists (select 1
from tb_ServiceMemo sm2
where sm2.Train_Number = sm.Train_Number and
sm2.Work_Start_Date < sm.Work_Start_Date and
sm2.Work_Completed_Date >= sm.Work_Start_Date
)
then 0 else 1
end) as isstart
from tb_ServiceMemo sm
)
select Train_Number, min(Work_Start_Date) as outage_start_date, max(Work_Completed_Date) as outage_end_date,
datediff(minute, min(Work_Start_Date), max(Work_Completed_Date))
from (select s.*, sum(isstart) over (partition by Train_Number order by Work_Start_Date) as grp
from starts s
) s
group by Train_Number, grp;
在此db<>fiddle中,我添加了几行以显示代码在不同情况下的工作方式。
答案 3 :(得分:0)
您可以尝试这个吗?我确定添加了其他测试用例,但我认为还可以。我也认为有更简单的方法
INSERT INTO [dbo].[tb_ServiceMemo]
SELECT 1, 1, CONVERT(DATETIME, '2018-08-01 09:35:00', 120), CONVERT(DATETIME, '2018-08-01 12:45:00', 120) union
SELECT 2, 1, CONVERT(DATETIME, '2018-08-01 12:35:00', 120), CONVERT(DATETIME, '2018-08-01 18:35:00', 120) union
SELECT 3, 1, CONVERT(DATETIME, '2018-08-01 14:20:00', 120), CONVERT(DATETIME, '2018-08-01 15:20:00', 120) union
SELECT 4, 1, CONVERT(DATETIME, '2018-08-01 18:00:00', 120), CONVERT(DATETIME, '2018-08-01 18:45:00', 120) union
SELECT 5, 1, CONVERT(DATETIME, '2018-08-01 19:00:00', 120), CONVERT(DATETIME, '2018-08-01 19:45:00', 120)
SELECT [Train_Number], SUM(DATEDIFF(MINUTE, T.[Work_Start_Date], T.Work_Completed_Date)) as Delay
FROM (
SELECT
[Job_Number],
[Train_Number],
CASE
WHEN EXISTS(SELECT * FROM [tb_ServiceMemo] T3 WHERE T1.[Work_Start_Date] BETWEEN T3.[Work_Start_Date] AND T3.[Work_Completed_Date] AND T1.[Job_Number] <> T3.[Job_Number] AND T1.Train_Number = T3.Train_Number)
THEN (SELECT MAX(T3.[Work_Completed_Date]) FROM [tb_ServiceMemo] T3 WHERE T1.[Work_Start_Date] BETWEEN T3.[Work_Start_Date] AND T3.[Work_Completed_Date] AND T1.[Job_Number] <> T3.[Job_Number] AND T1.Train_Number = T3.Train_Number)
ELSE [Work_Start_Date] END as [Work_Start_Date],
[Work_Completed_Date]
FROM [tb_ServiceMemo] T1
WHERE NOT EXISTS( -- To kick off the ignored case
SELECT T2.*
FROM [tb_ServiceMemo] T2
WHERE T2.[Work_Start_Date] < T1.[Work_Start_Date] AND T2.[Work_Completed_Date] > T1.[Work_Completed_Date]
)
) as T
GROUP BY [Train_Number]
想法是: