我有一个样本表
CREATE TABLE [dbo].[wt](
[id] [int] NULL,
[dt] [datetime] NULL,
[txt] [nvarchar](50) NULL
) ON [PRIMARY]
GO
INSERT INTO [dbo].[wt]
([id]
,[dt]
,[txt])
VALUES
(1, '2017-01-01 00:01:00.000', 't1'),
(2, '2017-01-01 00:03:00.000', 't1'),
(3, '2017-01-01 00:02:00.000', 't1'),
(4, '2017-01-01 01:04:00.000', 't1'),
(5, '2017-01-01 02:10:00.000', 't1'),
(6, '2017-01-01 00:01:00.000', 't1'),
(7, '2017-01-01 01:05:00.000', 't1'),
(8, '2017-01-01 02:10:00.000', 't2'),
(9, '2017-01-01 00:03:00.000', 't2'),
(10,'2017-01-01 01:04:00.000', 't2'),
(11,'2017-01-01 00:52:00.000', 't1')
我想有一个txt代码列表和dt日期按txt代码分组,其中间隔beetwen txt发生至少一个小时而且介于两者之间。 澄清t1首次出现在'2017-01-01 00:01:00.000' 然后我要找的下一次发生是至少一个小时后 这将是'2017-01-01 01:04:00.000' 我要找的第三次出现是在'2017-01-01 01:04:00.000'之后至少一个小时之后等等。
经过一番搜索,我找到了类似的东西
;with a as (
select txt, dt,
rn = row_number() over (partition by txt order by dt asc)
from [wt]),
b as (
select txt, dt, dt as dt2, rn, null tm, 0 recurrence
from a
where rn = 1
union all
select a.txt, a.dt, a.dt,
a.rn, datediff(MINUTE,a.dt,b.dt) tm,
case when dateadd(MINUTE,-60,a.dt) < b.dt then recurrence + 1 else 0 end
from b join a
on b.rn = a.rn - 1 and b.txt = a.txt
)
select txt, dt, rn, tm, recurrence
from b
where recurrence = 0
order by txt, dt
但这并不好,因为间隔不是从第一次出现开始计算,而是从最后一次计算,所以我得到了
txt dt rn tm recurrence
t1 2017-01-01 00:01:00.000 1 NULL 0
t1 2017-01-01 02:10:00.000 8 -65 0
t2 2017-01-01 00:03:00.000 1 NULL 0
t2 2017-01-01 01:04:00.000 2 -61 0
t2 2017-01-01 02:10:00.000 3 -66 0
我认为我找到了一种解决方法,因为在这种情况下我可以在同一时间内对记录进行分组,但我对该解决方案不满意。
select txt, min(dt) dt
into #ttwt
from [wt]
group by txt, substring(convert(varchar,dt,120),1,14)+'00:00.000'
;with a as (
select txt, dt,
rn = row_number() over (partition by txt order by dt asc)
from #ttwt),
b as (
select txt, dt, dt as dt2, rn, null tm, 0 recurrence
from a
where rn = 1
union all
select a.txt, a.dt, a.dt,
a.rn, datediff(MINUTE,a.dt,b.dt) tm,
case when dateadd(MINUTE,-60,a.dt) < b.dt then recurrence + 1 else 0 end
from b join a
on b.rn = a.rn - 1 and b.txt = a.txt
)
select txt, dt, rn, tm, recurrence
from b
where recurrence = 0
order by txt, dt
drop table #ttwt
txt dt rn tm recurrence
t1 2017-01-01 00:01:00.000 1 NULL 0
t1 2017-01-01 01:04:00.000 2 -63 0
t1 2017-01-01 02:10:00.000 3 -66 0
t2 2017-01-01 00:03:00.000 1 NULL 0
t2 2017-01-01 01:04:00.000 2 -61 0
t2 2017-01-01 02:10:00.000 3 -66 0
任何有关改进脚本的建议,以便让间隔为以分钟为单位的任何输入值,我们将不胜感激。
答案 0 :(得分:1)
如果我理解正确,我认为以下内容符合你的需要。
CREATE TABLE #T (id INT , rn INT, txt VARCHAR(10), dt DATETIME, lagDiff INT, runningDiff INT)
INSERT INTO #T (id, rn, txt, dt, lagDiff, runningDiff)
SELECT id
, ROW_NUMBER() OVER( PARTITION BY txt ORDER BY dt, id) -1 rn
, txt
, dt
, DATEDIFF(MINUTE, COALESCE(LAG(dt) OVER( PARTITION BY txt ORDER BY dt, id), dt), dt) Diff
, DATEDIFF(MINUTE, COALESCE(FIRST_VALUE(dt) OVER( PARTITION BY txt ORDER BY dt, id), dt), dt) RunningDiff
FROM wt
; WITH CTE AS (
SELECT *, 1 AS Level
FROM #T
WHERE rn = 0
UNION ALL
SELECT T.*, CTE.Level + 1
FROM #T T
INNER JOIN CTE ON CTE.txt = T.txt AND CTE.rn < T.rn AND T.runningDiff - 60 > CTE.runningDiff
WHERE T.rn > 0
)
, X AS (
SELECT txt
, Level
, MIN(rn) rn
FROM CTE
GROUP BY txt, Level
)
SELECT #T.*
FROM X
INNER JOIN #T ON #T.txt = X.txt AND #T.rn = X.rn
输出
+----+----+-----+-------------------------+---------+-------------+
| id | rn | txt | dt | lagDiff | runningDiff |
+----+----+-----+-------------------------+---------+-------------+
| 1 | 0 | t1 | 2017-01-01 00:01:00.000 | 0 | 0 |
| 4 | 5 | t1 | 2017-01-01 01:04:00.000 | 12 | 63 |
| 5 | 7 | t1 | 2017-01-01 02:10:00.000 | 65 | 129 |
| 9 | 0 | t2 | 2017-01-01 00:03:00.000 | 0 | 0 |
| 10 | 1 | t2 | 2017-01-01 01:04:00.000 | 61 | 61 |
| 8 | 2 | t2 | 2017-01-01 02:10:00.000 | 66 | 127 |
+----+----+-----+-------------------------+---------+-------------+
答案 1 :(得分:0)
我有点像冒泡的方法。我在进行递归操作时发现的问题是它们适用于小型集合(想想小于5或10k),然后当你变大时表现得很糟糕。出于这个原因,我喜欢光标方法,你实际上是在说:“你是否大于标准?是,否。插入或忽略,删除,继续前进。”通过这种方式,您只需对一次和一次的每个项目进行评估,而不是递归主题的每个变体。
DECLARE @Temp TABLE
(
id INT
, dt DATETIME
, txt VARCHAR(8)
, rwn INT
)
DECLARE @Holder TABLE
(
id INT
, dt DATETIME
, txt VARCHAR(8)
, Dif int
)
INSERT INTO @Temp
SELECT *, row_number() over (partition by txt order by dt, id) AS rn
From wt
WHILE EXISTS (SELECT 1 FROM @Temp)
BEGIN
DECLARE
@CurId INT
, @CurDt DATETIME
, @Curtxt VARCHAR(8)
, @LastDate DATETIME
;
SELECT TOP 1 @CurId = Id, @CurDt = Dt, @Curtxt = txt FROM @Temp ORDER BY txt, rwn
--If there is not entry you need a single entry
IF NOT EXISTS (SELECT TOP 1 * FROM @Holder)
BEGIN
INSERT INTO @Holder VALUES (@CurId, @CurDt, @curtxt, null)
END
ELSE
--if you reset the grouping you need to reset and begin anew
IF (SELECT rwn FROM @Temp WHERE Id = @CurId) = 1
BEGIN
INSERT INTO @Holder VALUES (@CurId, @CurDt, @curtxt, null)
END
--if you are going along check the logic for the difference of what the last was compared to the current
ELSE
BEGIN
SELECT TOP 1 @LastDate = dt FROM @Holder ORDER BY id desc
IF DATEDIFF(HOUR, @LastDate, @CurDt) >= 1
BEGIN
INSERT INTO @Holder VALUES (@CurId, @CurDt, @curtxt, DATEDIFF(MINUTE, @LastDate, @CurDt))
END
END
--Delete the running values and loop again
DELETE @Temp WHERE Id = @CurId
END
Select *
From @Holder