基于时间窗口的Cluster DateTime值

时间:2014-11-07 10:48:03

标签: sql-server sql-server-2008 tsql sql-server-2008-r2

我的目标是将属于同一时间窗口(例如10秒)的日期时间值集群到SQL Server 2008 R2中属于同一个类的子组(每个类可以有1-n子组)。这是我的第一次尝试:

DECLARE @TimeWindowInSeconds INT

SET @TimeWindowInSeconds = 10

IF OBJECT_ID('tempdb..#Temp') IS NOT NULL
    DROP TABLE #Temp
CREATE TABLE #Temp ( Class INT, DT DATETIME )

INSERT  INTO #Temp
        ( Class, DT )
VALUES  ( 1, '2014-11-05 10:55:00:00' ),
        ( 1, '2014-11-05 10:55:01:00' ),
        ( 1, '2014-11-05 10:55:02:00' ),
        ( 1, '2014-11-05 10:55:03:00' ),
        ( 1, '2014-11-05 10:55:04:00' ),
        ( 1, '2014-11-05 10:55:05:00' ),
        ( 1, '2014-11-05 10:55:06:00' ),
        ( 1, '2014-11-05 10:55:07:00' ),
        ( 1, '2014-11-05 10:55:08:00' ),
        ( 1, '2014-11-05 10:55:09:00' ),
        ( 1, '2014-11-05 10:55:10:00' ),
        ( 1, '2014-11-05 10:55:11:00' ),
        ( 1, '2014-11-05 11:55:12:00' ),
        ( 1, '2014-11-05 11:55:13:00' ),
        ( 1, '2014-11-05 11:55:14:00' ),
        ( 1, '2014-11-05 11:55:15:00' ),
        ( 1, '2014-11-05 11:55:16:00' ),
        ( 1, '2014-11-05 11:55:17:00' ),
        ( 1, '2014-11-05 11:55:18:00' ),
        ( 1, '2014-11-05 11:55:19:00' ),
        ( 1, '2014-11-05 11:55:21:00' ),
        ( 1, '2014-11-05 11:55:22:00' ),
        ( 1, '2014-11-05 11:55:23:00' ),
        ( 1, '2014-11-05 11:55:24:00' ),
        ( 1, '2014-11-05 11:55:25:00' ),
        ( 1, '2014-11-05 11:55:26:00' ),
        ( 1, '2014-11-05 11:55:27:00' ),
        ( 2, '2014-11-05 10:55:10:00' ),
        ( 2, '2014-11-05 10:55:11:00' );

;
WITH    CTE1
          AS ( SELECT   Class ,
                        DT ,
                        ROW_NUMBER() OVER ( ORDER BY Class, DT ) AS RowNumber
               FROM     #Temp
             ),
        CTE2
          AS (
    -- A is the successor 
               SELECT   A.Class ,
                        A.RowNumber ,
                        B.RowNumber AS RowNumber1 ,
                        A.DT ,
                        B.DT AS DT2 ,
                        DATEDIFF(SECOND, B.DT, A.DT) AS DifferenceInSeconds ,
                        CASE WHEN B.DT IS NULL THEN 1
                        END z
               FROM     CTE1 AS A
                        LEFT OUTER JOIN CTE1 AS B ON A.RowNumber = B.RowNumber
                                                     + 1
                                                     AND A.Class = B.Class
                                                     AND DATEDIFF(SECOND, B.DT,
                                                              A.DT) < @TimeWindowInSeconds
             ),
        CTE3
          AS ( SELECT   CTE2.Class ,
                        CTE2.DT ,
                        ( SELECT    SUM(z)
                          FROM      CTE2 c
                          WHERE     c.DT <= CTE2.DT
                                    AND c.Class = CTE2.Class
                        ) AS SubGroup
               FROM     CTE2
             )
    SELECT  
        *
    FROM    CTE3
    WHERE   CAST(CTE3.Class AS NVARCHAR(100)) + '-'
            + CAST(CTE3.SubGroup AS NVARCHAR(100)) IN (
            SELECT  CAST(CTE3.Class AS NVARCHAR(100)) + '-'
                    + CAST(CTE3.SubGroup AS NVARCHAR(100))
            FROM    CTE3
            GROUP BY CTE3.Class ,
                    CTE3.SubGroup
            HAVING  COUNT(*) > 1 )
            ORDER BY Class, DT

不幸的是,它会产生错误的结果:

Class   DT  SubGroup
1   2014-11-05 10:55:00.000 1
1   2014-11-05 10:55:01.000 1
1   2014-11-05 10:55:02.000 1
1   2014-11-05 10:55:03.000 1
1   2014-11-05 10:55:04.000 1
1   2014-11-05 10:55:05.000 1
1   2014-11-05 10:55:06.000 1
1   2014-11-05 10:55:07.000 1
1   2014-11-05 10:55:08.000 1
1   2014-11-05 10:55:09.000 1
1   2014-11-05 10:55:10.000 1
1   2014-11-05 10:55:11.000 1
1   2014-11-05 11:55:12.000 2
1   2014-11-05 11:55:13.000 2
1   2014-11-05 11:55:14.000 2
1   2014-11-05 11:55:15.000 2
1   2014-11-05 11:55:16.000 2
1   2014-11-05 11:55:17.000 2
1   2014-11-05 11:55:18.000 2
1   2014-11-05 11:55:19.000 2
1   2014-11-05 11:55:21.000 2
1   2014-11-05 11:55:22.000 2
1   2014-11-05 11:55:23.000 2
1   2014-11-05 11:55:24.000 2
1   2014-11-05 11:55:25.000 2
1   2014-11-05 11:55:26.000 2
1   2014-11-05 11:55:27.000 2
2   2014-11-05 10:55:10.000 1
2   2014-11-05 10:55:11.000 1

正确的结果应该是(参见CorrectSubGroup):

Class   DT  SubGroup    CorrectSubGroup
1   05/11/2014 10:55:00 1   1
1   05/11/2014 10:55:01 1   1
1   05/11/2014 10:55:02 1   1
1   05/11/2014 10:55:03 1   1
1   05/11/2014 10:55:04 1   1
1   05/11/2014 10:55:05 1   1
1   05/11/2014 10:55:06 1   1
1   05/11/2014 10:55:07 1   1
1   05/11/2014 10:55:08 1   1
1   05/11/2014 10:55:09 1   1
1   05/11/2014 10:55:10 1   2
1   05/11/2014 10:55:11 1   2
1   05/11/2014 11:55:12 2   2
1   05/11/2014 11:55:13 2   2
1   05/11/2014 11:55:14 2   2
1   05/11/2014 11:55:15 2   2
1   05/11/2014 11:55:16 2   2
1   05/11/2014 11:55:17 2   2
1   05/11/2014 11:55:18 2   2
1   05/11/2014 11:55:19 2   2
1   05/11/2014 11:55:21 2   3
1   05/11/2014 11:55:22 2   3
1   05/11/2014 11:55:23 2   3
1   05/11/2014 11:55:24 2   3
1   05/11/2014 11:55:25 2   3
1   05/11/2014 11:55:26 2   3
1   05/11/2014 11:55:27 2   3
2   05/11/2014 10:55:10 1   1
2   05/11/2014 10:55:11 1   1

任何帮助表示赞赏。感谢。

2 个答案:

答案 0 :(得分:1)

这应该可以解决问题,希望有所帮助。

DECLARE @TimeWindowInSeconds INT
DECLARE @TimePartitioner INT

--Define the time window (e.g. 10 seconds)
SET @TimeWindowInSeconds = 10

--Calculate the sub group based on the Time Window
SET @TimePartitioner = 60 / @TimeWindowInSeconds;

IF OBJECT_ID('tempdb..##Temp') IS NOT NULL
    DROP TABLE ##Temp
CREATE TABLE ##Temp ( Class INT, DT DATETIME )

INSERT  INTO ##Temp
        ( Class, DT )
VALUES  ( 1, '2014-11-05 10:55:00:00' ),
        ( 1, '2014-11-05 10:55:01:00' ),
        ( 1, '2014-11-05 10:55:02:00' ),
        ( 1, '2014-11-05 10:55:03:00' ),
        ( 1, '2014-11-05 10:55:04:00' ),
        ( 1, '2014-11-05 10:55:05:00' ),
        ( 1, '2014-11-05 10:55:06:00' ),
        ( 1, '2014-11-05 10:55:07:00' ),
        ( 1, '2014-11-05 10:55:08:00' ),
        ( 1, '2014-11-05 10:55:09:00' ),
        ( 1, '2014-11-05 10:55:10:00' ),
        ( 1, '2014-11-05 10:55:11:00' ),
        ( 1, '2014-11-05 11:55:12:00' ),
        ( 1, '2014-11-05 11:55:13:00' ),
        ( 1, '2014-11-05 11:55:14:00' ),
        ( 1, '2014-11-05 11:55:15:00' ),
        ( 1, '2014-11-05 11:55:16:00' ),
        ( 1, '2014-11-05 11:55:17:00' ),
        ( 1, '2014-11-05 11:55:18:00' ),
        ( 1, '2014-11-05 11:55:19:00' ),
        ( 1, '2014-11-05 11:55:21:00' ),
        ( 1, '2014-11-05 11:55:22:00' ),
        ( 1, '2014-11-05 11:55:23:00' ),
        ( 1, '2014-11-05 11:55:24:00' ),
        ( 1, '2014-11-05 11:55:25:00' ),
        ( 1, '2014-11-05 11:55:26:00' ),
        ( 1, '2014-11-05 11:55:27:00' ),
        ( 2, '2014-11-05 10:55:10:00' ),
        ( 2, '2014-11-05 10:55:11:00' );

WITH cte_secondraingebuild (seconds)
AS (SELECT 0 AS Seconds
    UNION ALL
    SELECT o.seconds + 1
    FROM   cte_secondraingebuild o
    WHERE  o.seconds < 59),
cte_secondrainge (seconds, SubGroup)
AS (SELECT seconds,
        Ntile(@TimePartitioner)
            OVER(
            ORDER BY Cast(seconds AS INT))
    FROM   cte_secondraingebuild),
cte_distinctdates (datetimes)
AS (SELECT DISTINCT CONVERT(VARCHAR(17), dt, 120)
    FROM   ##temp),
cte_datetimegapless (datetimesfull, SubGroup)
AS (SELECT Cast(dd.datetimes + RIGHT('00'+Cast(sr.seconds AS VARCHAR(2)), 2) AS DATETIME),
        SubGroup
    FROM   cte_distinctdates dd
    CROSS JOIN cte_secondrainge sr),
cte_resultstaging
AS (SELECT tp.class,
        tp.dt,
        datetimesfull,
        SubGroup
    FROM   ##temp tp
    JOIN cte_datetimegapless dtgl
        ON dtgl.datetimesfull = tp.dt)
SELECT Class,DT,SubGroup
FROM   cte_resultstaging
ORDER  BY class,dt  

答案 1 :(得分:0)

试试这个..

SELECT class,
       dt,
       ( Datepart(second, DT) / 10 ) + 1
FROM   #Temp
GROUP  BY class,
          dt,
          Datepart(second, DT) / 10