查询选择相同的事件代码,间隔至少一小时

时间:2017-05-04 12:45:45

标签: sql sql-server tsql

我有一个样本表

CREATE TABLE [dbo].[wt](
[id] [int] NULL,
[dt] [datetime] NULL,
[txt] [nvarchar](50) NULL
) ON [PRIMARY]

GO

INSERT INTO [dbo].[wt]
       ([id]
       ,[dt]
       ,[txt])
 VALUES
(1, '2017-01-01 00:01:00.000',  't1'),
(2, '2017-01-01 00:03:00.000',  't1'),
(3, '2017-01-01 00:02:00.000',  't1'),
(4, '2017-01-01 01:04:00.000',  't1'),
(5, '2017-01-01 02:10:00.000',  't1'),
(6, '2017-01-01 00:01:00.000',  't1'),
(7, '2017-01-01 01:05:00.000',  't1'),
(8, '2017-01-01 02:10:00.000',  't2'),
(9, '2017-01-01 00:03:00.000',  't2'),
(10,'2017-01-01 01:04:00.000',  't2'),
(11,'2017-01-01 00:52:00.000',  't1')

我想有一个txt代码列表和dt日期按txt代码分组,其中间隔beetwen txt发生至少一个小时而且介于两者之间。 澄清t1首次出现在'2017-01-01 00:01:00.000' 然后我要找的下一次发生是至少一个小时后 这将是'2017-01-01 01:04:00.000' 我要找的第三次出现是在'2017-01-01 01:04:00.000'之后至少一个小时之后等等。

经过一番搜索,我找到了类似的东西

;with a as (
select txt, dt,
rn = row_number() over (partition by txt order by dt asc) 
from [wt]),
b as (
select txt, dt, dt as dt2, rn, null tm, 0 recurrence
from a
where rn = 1
union all
select a.txt, a.dt, a.dt, 
a.rn, datediff(MINUTE,a.dt,b.dt) tm,
case when dateadd(MINUTE,-60,a.dt) < b.dt then recurrence + 1 else 0 end
from b join a
on b.rn = a.rn - 1 and b.txt = a.txt
)
select txt, dt, rn, tm, recurrence
from b
where recurrence = 0 
order by txt, dt

但这并不好,因为间隔不是从第一次出现开始计算,而是从最后一次计算,所以我得到了

txt dt  rn  tm  recurrence
t1  2017-01-01 00:01:00.000 1   NULL    0
t1  2017-01-01 02:10:00.000 8   -65 0
t2  2017-01-01 00:03:00.000 1   NULL    0
t2  2017-01-01 01:04:00.000 2   -61 0
t2  2017-01-01 02:10:00.000 3   -66 0

我认为我找到了一种解决方法,因为在这种情况下我可以在同一时间内对记录进行分组,但我对该解决方案不满意。

select txt, min(dt) dt 
into #ttwt
from [wt]
group by txt, substring(convert(varchar,dt,120),1,14)+'00:00.000'

;with a as (
select txt, dt,
rn = row_number() over (partition by txt order by dt asc)
from #ttwt), 
b as (
select txt, dt, dt as dt2, rn, null tm, 0 recurrence
from a
where rn = 1
union all
select a.txt, a.dt, a.dt, 
a.rn, datediff(MINUTE,a.dt,b.dt) tm,
case when dateadd(MINUTE,-60,a.dt) < b.dt then recurrence + 1 else 0 end
from b join a
on b.rn = a.rn - 1 and b.txt = a.txt
)
select txt, dt, rn, tm, recurrence
from b
where recurrence = 0 
order by txt, dt

drop table #ttwt

txt dt  rn  tm  recurrence
t1  2017-01-01 00:01:00.000 1   NULL    0
t1  2017-01-01 01:04:00.000 2   -63 0
t1  2017-01-01 02:10:00.000 3   -66 0
t2  2017-01-01 00:03:00.000 1   NULL    0
t2  2017-01-01 01:04:00.000 2   -61 0
t2  2017-01-01 02:10:00.000 3   -66 0

任何有关改进脚本的建议,以便让间隔为以分钟为单位的任何输入值,我们将不胜感激。

2 个答案:

答案 0 :(得分:1)

如果我理解正确,我认为以下内容符合你的需要。

CREATE TABLE #T (id INT , rn INT, txt VARCHAR(10), dt DATETIME, lagDiff INT, runningDiff INT)

INSERT INTO #T (id, rn, txt, dt, lagDiff, runningDiff)
SELECT  id
      , ROW_NUMBER() OVER( PARTITION BY txt ORDER BY dt, id) -1 rn
      , txt
      , dt
      , DATEDIFF(MINUTE, COALESCE(LAG(dt) OVER( PARTITION BY txt ORDER BY dt, id), dt), dt) Diff
      , DATEDIFF(MINUTE, COALESCE(FIRST_VALUE(dt) OVER( PARTITION BY txt ORDER BY dt, id), dt), dt) RunningDiff
FROM    wt

; WITH CTE AS (
  SELECT  *, 1 AS Level
  FROM    #T
  WHERE   rn = 0
  UNION ALL
  SELECT  T.*, CTE.Level + 1 
  FROM    #T T
          INNER JOIN CTE ON CTE.txt = T.txt AND CTE.rn < T.rn AND T.runningDiff - 60 > CTE.runningDiff
  WHERE   T.rn > 0
)
, X AS (
  SELECT  txt
        , Level
        , MIN(rn) rn
  FROM    CTE 
  GROUP BY txt, Level
)
SELECT  #T.*
FROM    X
        INNER JOIN #T ON #T.txt = X.txt AND #T.rn = X.rn

输出

+----+----+-----+-------------------------+---------+-------------+
| id | rn | txt |           dt            | lagDiff | runningDiff |
+----+----+-----+-------------------------+---------+-------------+
|  1 |  0 | t1  | 2017-01-01 00:01:00.000 |       0 |           0 |
|  4 |  5 | t1  | 2017-01-01 01:04:00.000 |      12 |          63 |
|  5 |  7 | t1  | 2017-01-01 02:10:00.000 |      65 |         129 |
|  9 |  0 | t2  | 2017-01-01 00:03:00.000 |       0 |           0 |
| 10 |  1 | t2  | 2017-01-01 01:04:00.000 |      61 |          61 |
|  8 |  2 | t2  | 2017-01-01 02:10:00.000 |      66 |         127 |
+----+----+-----+-------------------------+---------+-------------+    

答案 1 :(得分:0)

我有点像冒泡的方法。我在进行递归操作时发现的问题是它们适用于小型集合(想想小于5或10k),然后当你变大时表现得很糟糕。出于这个原因,我喜欢光标方法,你实际上是在说:“你是否大于标准?是,否。插入或忽略,删除,继续前进。”通过这种方式,您只需对一次和一次的每个项目进行评估,而不是递归主题的每个变体。

DECLARE @Temp TABLE 
  (
    id INT 
  , dt DATETIME
  , txt VARCHAR(8)
  , rwn INT
  )

DECLARE @Holder TABLE 
  (
    id INT 
  , dt DATETIME
  , txt VARCHAR(8)
  , Dif int
  )

INSERT INTO @Temp
SELECT *, row_number() over (partition by txt order by dt, id) AS rn
From wt

WHILE EXISTS (SELECT 1 FROM @Temp)
BEGIN
    DECLARE 
      @CurId    INT
    , @CurDt    DATETIME
    , @Curtxt   VARCHAR(8)
    , @LastDate DATETIME
    ;

    SELECT TOP 1 @CurId = Id, @CurDt = Dt, @Curtxt = txt FROM @Temp ORDER BY txt, rwn

    --If there is not entry you need a single entry
    IF NOT EXISTS (SELECT TOP 1 * FROM @Holder)
       BEGIN
          INSERT INTO @Holder VALUES (@CurId, @CurDt, @curtxt, null)
       END
    ELSE
      --if you reset the grouping you need to reset and begin anew
      IF (SELECT rwn FROM @Temp WHERE Id = @CurId) = 1
        BEGIN
            INSERT INTO @Holder VALUES (@CurId, @CurDt, @curtxt, null)
        END
      --if you are going along check the logic for the difference of what the last was compared to the current
      ELSE
        BEGIN
         SELECT TOP 1 @LastDate = dt FROM @Holder ORDER BY id desc

         IF DATEDIFF(HOUR, @LastDate, @CurDt) >= 1
         BEGIN
             INSERT INTO @Holder VALUES (@CurId, @CurDt, @curtxt, DATEDIFF(MINUTE, @LastDate, @CurDt))
         END
        END

    --Delete the running values and loop again
    DELETE @Temp WHERE Id = @CurId
END

Select *
From @Holder